]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/rebuild_mondb.py
2 Test if we can recover the leveldb from OSD after where all leveldbs are
12 from teuthology
import misc
as teuthology
14 log
= logging
.getLogger(__name__
)
17 def push_directory(path
, remote
, remote_dir
):
19 local_temp_path=`mktemp`
20 tar czf $local_temp_path $path
21 ssh remote mkdir -p remote_dir
22 remote_temp_path=`mktemp`
23 scp $local_temp_path $remote_temp_path
25 tar xzf $remote_temp_path -C $remote_dir
26 ssh remote:$remote_temp_path
28 fd
, local_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
29 prefix
='rebuild_mondb-')
31 cmd
= ' '.join(['tar', 'cz',
32 '-f', local_temp_path
,
36 _
, fname
= os
.path
.split(local_temp_path
)
37 fd
, remote_temp_path
= tempfile
.mkstemp(suffix
='.tgz',
38 prefix
='rebuild_mondb-')
40 remote
.put_file(local_temp_path
, remote_temp_path
)
41 os
.remove(local_temp_path
)
42 remote
.run(args
=['sudo',
45 '-f', remote_temp_path
])
46 remote
.run(args
=['sudo', 'rm', '-fr', remote_temp_path
])
49 def task(ctx
, config
):
51 Test monitor recovery from OSD
55 assert isinstance(config
, dict), \
56 'task only accepts a dict for configuration'
58 first_mon
= teuthology
.get_first_mon(ctx
, config
)
59 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
61 manager
= ceph_manager
.CephManager(
64 logger
=log
.getChild('ceph_manager'))
66 mons
= ctx
.cluster
.only(teuthology
.is_type('mon'))
68 # note down the first cluster_name and mon_id
69 # we will recover it later on
72 for remote
, roles
in mons
.remotes
.iteritems():
73 is_mon
= teuthology
.is_type('mon')
77 cluster
, _
, m
= teuthology
.split_role(role
)
78 if cluster_name
is None:
79 cluster_name
= cluster
81 assert cluster_name
== cluster
82 log
.info('killing {cluster}:mon.{mon}'.format(
86 mon_data
= os
.path
.join('/var/lib/ceph/mon/',
87 '{0}-{1}'.format(cluster_name
, m
))
89 # so we will only need to recreate the store.db for the
90 # first mon, would be easier than mkfs on it then replace
91 # the its store.db with the recovered one
92 store_dir
= os
.path
.join(mon_data
, 'store.db')
93 remote
.run(args
=['sudo', 'rm', '-r', store_dir
])
95 remote
.run(args
=['sudo', 'rm', '-r', mon_data
])
97 local_mstore
= tempfile
.mkdtemp()
99 # collect the maps from all OSDs
100 osds
= ctx
.cluster
.only(teuthology
.is_type('osd'))
102 for osd
, roles
in osds
.remotes
.iteritems():
103 is_osd
= teuthology
.is_type('osd')
107 cluster
, _
, osd_id
= teuthology
.split_role(role
)
108 assert cluster_name
== cluster
109 log
.info('collecting maps from {cluster}:osd.{osd}'.format(
112 # push leveldb to OSD
113 osd_mstore
= os
.path
.join(teuthology
.get_testdir(ctx
), 'mon-store')
114 osd
.run(args
=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore
])
116 push_directory(local_mstore
, osd
, osd_mstore
)
117 log
.info('rm -rf {0}'.format(local_mstore
))
118 shutil
.rmtree(local_mstore
)
119 # update leveldb with OSD data
120 options
= '--op update-mon-db --mon-store-path {0}'
121 log
.info('cot {0}'.format(osd_mstore
))
122 manager
.objectstore_tool(pool
=None,
123 options
=options
.format(osd_mstore
),
127 # pull the updated mon db
128 log
.info('pull dir {0} -> {1}'.format(osd_mstore
, local_mstore
))
129 local_mstore
= tempfile
.mkdtemp()
130 teuthology
.pull_directory(osd
, osd_mstore
, local_mstore
)
131 log
.info('rm -rf osd:{0}'.format(osd_mstore
))
132 osd
.run(args
=['sudo', 'rm', '-fr', osd_mstore
])
134 # recover the first_mon with re-built mon db
135 # pull from recovered leveldb from client
136 mon_store_dir
= os
.path
.join('/var/lib/ceph/mon',
137 '{0}-{1}'.format(cluster_name
, mon_id
))
138 push_directory(local_mstore
, mon
, mon_store_dir
)
139 mon
.run(args
=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir
])
140 shutil
.rmtree(local_mstore
)
141 default_keyring
= '/etc/ceph/{cluster}.keyring'.format(
142 cluster
=cluster_name
)
143 keyring_path
= config
.get('keyring_path', default_keyring
)
144 # fill up the caps in the keyring file
145 mon
.run(args
=['sudo',
146 'ceph-authtool', keyring_path
,
148 '--cap', 'mon', 'allow *'])
149 mon
.run(args
=['sudo',
150 'ceph-authtool', keyring_path
,
151 '-n', 'client.admin',
152 '--cap', 'mon', 'allow *',
153 '--cap', 'osd', 'allow *',
154 '--cap', 'mds', 'allow *'])
155 mon
.run(args
=['sudo', '-u', 'ceph',
156 'ceph-monstore-tool', mon_store_dir
,
157 'rebuild', '--', '--keyring',
161 # the initial monmap is in the ceph.conf, so we are good.
163 for remote
, roles
in mons
.remotes
.iteritems():
164 is_mon
= teuthology
.is_type('mon')
168 cluster
, _
, m
= teuthology
.split_role(role
)
169 assert cluster_name
== cluster
171 log
.info('running mkfs on {cluster}:mon.{mon}'.format(
178 '--cluster', cluster
,
181 '--keyring', keyring_path
])
182 manager
.revive_mon(m
)
185 manager
.wait_for_mon_quorum_size(n_mons
, timeout
=30)
186 for osd
, roles
in osds
.remotes
.iteritems():
187 is_osd
= teuthology
.is_type('osd')
191 _
, _
, osd_id
= teuthology
.split_role(role
)
192 log
.info('reviving osd.{0}'.format(osd_id
))
193 manager
.revive_osd(osd_id
)