]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rebuild_mondb.py
2 Test if we can recover the leveldb from OSD after where all leveldbs are
12 from teuthology
import misc
as teuthology
14 log
= logging
.getLogger(__name__
)
17 def _push_directory(path
, remote
, remote_dir
):
19 local_temp_path=`mktemp`
20 tar czf $local_temp_path $path
21 ssh remote mkdir -p remote_dir
22 remote_temp_path=`mktemp`
23 scp $local_temp_path $remote_temp_path
25 tar xzf $remote_temp_path -C $remote_dir
26 ssh remote:$remote_temp_path
28 fd
, local_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
29 prefix
='rebuild_mondb-')
31 cmd
= ' '.join(['tar', 'cz',
32 '-f', local_temp_path
,
36 _
, fname
= os
.path
.split(local_temp_path
)
37 fd
, remote_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
38 prefix
='rebuild_mondb-')
40 remote
.put_file(local_temp_path
, remote_temp_path
)
41 os
.remove(local_temp_path
)
42 remote
.run(args
=['sudo',
45 '-f', remote_temp_path
])
46 remote
.run(args
=['sudo', 'rm', '-fr', remote_temp_path
])
49 def _nuke_mons(manager
, mons
, mon_id
):
51 is_mon
= teuthology
.is_type('mon')
52 for remote
, roles
in mons
.remotes
.iteritems():
56 cluster
, _
, m
= teuthology
.split_role(role
)
57 log
.info('killing {cluster}:mon.{mon}'.format(
61 mon_data
= os
.path
.join('/var/lib/ceph/mon/',
62 '{0}-{1}'.format(cluster
, m
))
64 # so we will only need to recreate the store.db for the
65 # first mon, would be easier than mkfs on it then replace
66 # the its store.db with the recovered one
67 store_dir
= os
.path
.join(mon_data
, 'store.db')
68 remote
.run(args
=['sudo', 'rm', '-r', store_dir
])
70 remote
.run(args
=['sudo', 'rm', '-r', mon_data
])
73 def _rebuild_db(ctx
, manager
, cluster_name
, mon
, mon_id
, keyring_path
):
74 local_mstore
= tempfile
.mkdtemp()
76 # collect the maps from all OSDs
77 is_osd
= teuthology
.is_type('osd')
78 osds
= ctx
.cluster
.only(is_osd
)
80 for osd
, roles
in osds
.remotes
.iteritems():
84 cluster
, _
, osd_id
= teuthology
.split_role(role
)
85 assert cluster_name
== cluster
86 log
.info('collecting maps from {cluster}:osd.{osd}'.format(
90 osd_mstore
= os
.path
.join(teuthology
.get_testdir(ctx
), 'mon-store')
91 osd
.run(args
=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore
])
93 _push_directory(local_mstore
, osd
, osd_mstore
)
94 log
.info('rm -rf {0}'.format(local_mstore
))
95 shutil
.rmtree(local_mstore
)
96 # update leveldb with OSD data
97 options
= '--op update-mon-db --mon-store-path {0}'
98 log
.info('cot {0}'.format(osd_mstore
))
99 manager
.objectstore_tool(pool
=None,
100 options
=options
.format(osd_mstore
),
104 # pull the updated mon db
105 log
.info('pull dir {0} -> {1}'.format(osd_mstore
, local_mstore
))
106 local_mstore
= tempfile
.mkdtemp()
107 teuthology
.pull_directory(osd
, osd_mstore
, local_mstore
)
108 log
.info('rm -rf osd:{0}'.format(osd_mstore
))
109 osd
.run(args
=['sudo', 'rm', '-fr', osd_mstore
])
111 # recover the first_mon with re-built mon db
112 # pull from recovered leveldb from client
113 mon_store_dir
= os
.path
.join('/var/lib/ceph/mon',
114 '{0}-{1}'.format(cluster_name
, mon_id
))
115 _push_directory(local_mstore
, mon
, mon_store_dir
)
116 mon
.run(args
=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir
])
117 shutil
.rmtree(local_mstore
)
119 # fill up the caps in the keyring file
120 mon
.run(args
=['sudo',
121 'ceph-authtool', keyring_path
,
123 '--cap', 'mon', 'allow *'])
124 mon
.run(args
=['sudo',
125 'ceph-authtool', keyring_path
,
126 '-n', 'client.admin',
127 '--cap', 'mon', 'allow *',
128 '--cap', 'osd', 'allow *',
129 '--cap', 'mds', 'allow *',
130 '--cap', 'mgr', 'allow *'])
131 mon
.run(args
=['sudo', '-u', 'ceph',
132 'ceph-monstore-tool', mon_store_dir
,
133 'rebuild', '--', '--keyring',
137 def _revive_mons(manager
, mons
, recovered
, keyring_path
):
139 # the initial monmap is in the ceph.conf, so we are good.
141 is_mon
= teuthology
.is_type('mon')
142 for remote
, roles
in mons
.remotes
.iteritems():
146 cluster
, _
, m
= teuthology
.split_role(role
)
148 log
.info('running mkfs on {cluster}:mon.{mon}'.format(
155 '--cluster', cluster
,
158 '--keyring', keyring_path
])
159 log
.info('reviving mon.{0}'.format(m
))
160 manager
.revive_mon(m
)
162 manager
.wait_for_mon_quorum_size(n_mons
, timeout
=30)
165 def _revive_mgrs(ctx
, manager
):
166 is_mgr
= teuthology
.is_type('mgr')
167 mgrs
= ctx
.cluster
.only(is_mgr
)
168 for _
, roles
in mgrs
.remotes
.iteritems():
172 _
, _
, mgr_id
= teuthology
.split_role(role
)
173 log
.info('reviving mgr.{0}'.format(mgr_id
))
174 manager
.revive_mgr(mgr_id
)
177 def _revive_osds(ctx
, manager
):
178 is_osd
= teuthology
.is_type('osd')
179 osds
= ctx
.cluster
.only(is_osd
)
180 for _
, roles
in osds
.remotes
.iteritems():
184 _
, _
, osd_id
= teuthology
.split_role(role
)
185 log
.info('reviving osd.{0}'.format(osd_id
))
186 manager
.revive_osd(osd_id
)
189 def task(ctx
, config
):
191 Test monitor recovery from OSD
195 assert isinstance(config
, dict), \
196 'task only accepts a dict for configuration'
198 first_mon
= teuthology
.get_first_mon(ctx
, config
)
199 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
200 manager
= ceph_manager
.CephManager(
203 logger
=log
.getChild('ceph_manager'))
205 mons
= ctx
.cluster
.only(teuthology
.is_type('mon'))
206 # note down the first cluster_name and mon_id
207 # we will recover it later on
208 cluster_name
, _
, mon_id
= teuthology
.split_role(first_mon
)
209 _nuke_mons(manager
, mons
, mon_id
)
210 default_keyring
= '/etc/ceph/{cluster}.keyring'.format(
211 cluster
=cluster_name
)
212 keyring_path
= config
.get('keyring_path', default_keyring
)
213 _rebuild_db(ctx
, manager
, cluster_name
, mon
, mon_id
, keyring_path
)
214 _revive_mons(manager
, mons
, mon_id
, keyring_path
)
215 _revive_mgrs(ctx
, manager
)
216 _revive_osds(ctx
, manager
)