]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/reg11184.py
2 Special regression test for tracker #11184
4 Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
6 This is accomplished by moving a pg that wasn't part of split and still include
11 from cStringIO
import StringIO
13 from teuthology
import misc
as teuthology
14 from util
.rados
import rados
18 log
= logging
.getLogger(__name__
)
21 def task(ctx
, config
):
23 Test handling of divergent entries during export / import
24 to regression test tracker #11184
32 Requires 3 osds on a single test node.
36 assert isinstance(config
, dict), \
37 'divergent_priors task only accepts a dict for configuration'
39 manager
= ctx
.managers
['ceph']
41 while len(manager
.get_osd_status()['up']) < 3:
44 manager
.flush_pg_stats(osds
)
45 manager
.raw_cluster_cmd('osd', 'set', 'noout')
46 manager
.raw_cluster_cmd('osd', 'set', 'noin')
47 manager
.raw_cluster_cmd('osd', 'set', 'nodown')
48 manager
.wait_for_clean()
50 # something that is always there
51 dummyfile
= '/etc/fstab'
52 dummyfile2
= '/etc/resolv.conf'
53 testdir
= teuthology
.get_testdir(ctx
)
56 log
.info('creating foo')
57 manager
.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
59 # Remove extra pool to simlify log output
60 manager
.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it')
63 manager
.set_config(i
, osd_min_pg_log_entries
=10)
64 manager
.set_config(i
, osd_max_pg_log_entries
=10)
65 manager
.set_config(i
, osd_pg_log_trim_min
=5)
68 divergent
= manager
.get_pg_primary('foo', 0)
69 log
.info("primary and soon to be divergent is %d", divergent
)
70 non_divergent
= list(osds
)
71 non_divergent
.remove(divergent
)
73 log
.info('writing initial objects')
74 first_mon
= teuthology
.get_first_mon(ctx
, config
)
75 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
78 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
, dummyfile
])
80 manager
.wait_for_clean()
82 # blackhole non_divergent
83 log
.info("blackholing osds %s", str(non_divergent
))
84 for i
in non_divergent
:
85 manager
.set_config(i
, objectstore_blackhole
=1)
89 # Write some soon to be divergent
90 log
.info('writing divergent objects')
91 for i
in range(DIVERGENT_WRITE
):
92 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
,
93 dummyfile2
], wait
=False)
94 # Remove some soon to be divergent
95 log
.info('remove divergent objects')
96 for i
in range(DIVERGENT_REMOVE
):
97 rados(ctx
, mon
, ['-p', 'foo', 'rm',
98 'existing_%d' % (i
+ DIVERGENT_WRITE
)], wait
=False)
101 args
=['killall', '-9', 'rados'],
105 # kill all the osds but leave divergent in
106 log
.info('killing all the osds')
110 manager
.mark_down_osd(i
)
111 for i
in non_divergent
:
112 manager
.mark_out_osd(i
)
114 # bring up non-divergent
115 log
.info("bringing up non_divergent %s", str(non_divergent
))
116 for i
in non_divergent
:
117 manager
.revive_osd(i
)
118 for i
in non_divergent
:
119 manager
.mark_in_osd(i
)
121 # write 1 non-divergent object (ensure that old divergent one is divergent)
122 objname
= "existing_%d" % (DIVERGENT_WRITE
+ DIVERGENT_REMOVE
)
123 log
.info('writing non-divergent object ' + objname
)
124 rados(ctx
, mon
, ['-p', 'foo', 'put', objname
, dummyfile2
])
126 manager
.wait_for_recovery()
128 # ensure no recovery of up osds first
129 log
.info('delay recovery')
130 for i
in non_divergent
:
131 manager
.wait_run_admin_socket(
132 'osd', i
, ['set_recovery_delay', '100000'])
134 # bring in our divergent friend
135 log
.info("revive divergent %d", divergent
)
136 manager
.raw_cluster_cmd('osd', 'set', 'noup')
137 manager
.revive_osd(divergent
)
139 log
.info('delay recovery divergent')
140 manager
.wait_run_admin_socket(
141 'osd', divergent
, ['set_recovery_delay', '100000'])
143 manager
.raw_cluster_cmd('osd', 'unset', 'noup')
144 while len(manager
.get_osd_status()['up']) < 3:
147 log
.info('wait for peering')
148 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
150 # At this point the divergent_priors should have been detected
152 log
.info("killing divergent %d", divergent
)
153 manager
.kill_osd(divergent
)
155 # Split pgs for pool foo
156 manager
.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
159 manager
.raw_cluster_cmd('pg','dump')
162 (exp_remote
,) = ctx
.\
163 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
164 FSPATH
= manager
.get_filepath()
165 JPATH
= os
.path
.join(FSPATH
, "journal")
166 prefix
= ("sudo adjust-ulimits ceph-objectstore-tool "
167 "--data-path {fpath} --journal-path {jpath} "
169 "/var/log/ceph/objectstore_tool.$$.log ".
170 format(fpath
=FSPATH
, jpath
=JPATH
))
172 expfile
= os
.path
.join(testdir
, "exp.{pid}.out".format(pid
=pid
))
173 cmd
= ((prefix
+ "--op export --pgid 2.0 --file {file}").
174 format(id=divergent
, file=expfile
))
175 proc
= exp_remote
.run(args
=cmd
, wait
=True,
176 check_status
=False, stdout
=StringIO())
177 assert proc
.exitstatus
== 0
179 # Remove the same pg that was exported
180 cmd
= ((prefix
+ "--op remove --pgid 2.0").
181 format(id=divergent
))
182 proc
= exp_remote
.run(args
=cmd
, wait
=True,
183 check_status
=False, stdout
=StringIO())
184 assert proc
.exitstatus
== 0
186 # Kill one of non-divergent OSDs
187 log
.info('killing osd.%d' % non_divergent
[0])
188 manager
.kill_osd(non_divergent
[0])
189 manager
.mark_down_osd(non_divergent
[0])
190 # manager.mark_out_osd(non_divergent[0])
192 # An empty collection for pg 2.0 needs to be cleaned up
193 cmd
= ((prefix
+ "--op remove --pgid 2.0").
194 format(id=non_divergent
[0]))
195 proc
= exp_remote
.run(args
=cmd
, wait
=True,
196 check_status
=False, stdout
=StringIO())
197 assert proc
.exitstatus
== 0
199 cmd
= ((prefix
+ "--op import --file {file}").
200 format(id=non_divergent
[0], file=expfile
))
201 proc
= exp_remote
.run(args
=cmd
, wait
=True,
202 check_status
=False, stdout
=StringIO())
203 assert proc
.exitstatus
== 0
205 # bring in our divergent friend and other node
206 log
.info("revive divergent %d", divergent
)
207 manager
.revive_osd(divergent
)
208 manager
.mark_in_osd(divergent
)
209 log
.info("revive %d", non_divergent
[0])
210 manager
.revive_osd(non_divergent
[0])
212 while len(manager
.get_osd_status()['up']) < 3:
215 log
.info('delay recovery divergent')
216 manager
.set_config(divergent
, osd_recovery_delay_start
=100000)
217 log
.info('mark divergent in')
218 manager
.mark_in_osd(divergent
)
220 log
.info('wait for peering')
221 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
223 log
.info("killing divergent %d", divergent
)
224 manager
.kill_osd(divergent
)
225 log
.info("reviving divergent %d", divergent
)
226 manager
.revive_osd(divergent
)
229 log
.info('allowing recovery')
230 # Set osd_recovery_delay_start back to 0 and kick the queue
232 manager
.raw_cluster_cmd('tell', 'osd.%d' % i
, 'debug',
233 'kick_recovery_wq', ' 0')
235 log
.info('reading divergent objects')
236 for i
in range(DIVERGENT_WRITE
+ DIVERGENT_REMOVE
):
237 exit_status
= rados(ctx
, mon
, ['-p', 'foo', 'get', 'existing_%d' % i
,
239 assert exit_status
is 0
242 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
243 cmd
= 'rm {file}'.format(file=expfile
)
244 remote
.run(args
=cmd
, wait
=True)