]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/reg11184.py
2 Special regression test for tracker #11184
4 Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
6 This is accomplished by moving a pg that wasn't part of split and still include
11 from cStringIO
import StringIO
13 from teuthology
.orchestra
import run
14 from teuthology
import misc
as teuthology
15 from util
.rados
import rados
19 log
= logging
.getLogger(__name__
)
22 def task(ctx
, config
):
24 Test handling of divergent entries during export / import
25 to regression test tracker #11184
33 Requires 3 osds on a single test node.
37 assert isinstance(config
, dict), \
38 'divergent_priors task only accepts a dict for configuration'
40 manager
= ctx
.managers
['ceph']
42 while len(manager
.get_osd_status()['up']) < 3:
45 manager
.flush_pg_stats(osds
)
46 manager
.raw_cluster_cmd('osd', 'set', 'noout')
47 manager
.raw_cluster_cmd('osd', 'set', 'noin')
48 manager
.raw_cluster_cmd('osd', 'set', 'nodown')
49 manager
.wait_for_clean()
51 # something that is always there
52 dummyfile
= '/etc/fstab'
53 dummyfile2
= '/etc/resolv.conf'
54 testdir
= teuthology
.get_testdir(ctx
)
57 log
.info('creating foo')
58 manager
.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
59 manager
.raw_cluster_cmd(
60 'osd', 'pool', 'application', 'enable',
61 'foo', 'rados', run
.Raw('||'), 'true')
63 # Remove extra pool to simlify log output
64 manager
.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it')
67 manager
.set_config(i
, osd_min_pg_log_entries
=10)
68 manager
.set_config(i
, osd_max_pg_log_entries
=10)
69 manager
.set_config(i
, osd_pg_log_trim_min
=5)
72 divergent
= manager
.get_pg_primary('foo', 0)
73 log
.info("primary and soon to be divergent is %d", divergent
)
74 non_divergent
= list(osds
)
75 non_divergent
.remove(divergent
)
77 log
.info('writing initial objects')
78 first_mon
= teuthology
.get_first_mon(ctx
, config
)
79 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
82 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
, dummyfile
])
84 manager
.wait_for_clean()
86 # blackhole non_divergent
87 log
.info("blackholing osds %s", str(non_divergent
))
88 for i
in non_divergent
:
89 manager
.set_config(i
, objectstore_blackhole
=1)
93 # Write some soon to be divergent
94 log
.info('writing divergent objects')
95 for i
in range(DIVERGENT_WRITE
):
96 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
,
97 dummyfile2
], wait
=False)
98 # Remove some soon to be divergent
99 log
.info('remove divergent objects')
100 for i
in range(DIVERGENT_REMOVE
):
101 rados(ctx
, mon
, ['-p', 'foo', 'rm',
102 'existing_%d' % (i
+ DIVERGENT_WRITE
)], wait
=False)
105 args
=['killall', '-9', 'rados'],
109 # kill all the osds but leave divergent in
110 log
.info('killing all the osds')
114 manager
.mark_down_osd(i
)
115 for i
in non_divergent
:
116 manager
.mark_out_osd(i
)
118 # bring up non-divergent
119 log
.info("bringing up non_divergent %s", str(non_divergent
))
120 for i
in non_divergent
:
121 manager
.revive_osd(i
)
122 for i
in non_divergent
:
123 manager
.mark_in_osd(i
)
125 # write 1 non-divergent object (ensure that old divergent one is divergent)
126 objname
= "existing_%d" % (DIVERGENT_WRITE
+ DIVERGENT_REMOVE
)
127 log
.info('writing non-divergent object ' + objname
)
128 rados(ctx
, mon
, ['-p', 'foo', 'put', objname
, dummyfile2
])
130 manager
.wait_for_recovery()
132 # ensure no recovery of up osds first
133 log
.info('delay recovery')
134 for i
in non_divergent
:
135 manager
.wait_run_admin_socket(
136 'osd', i
, ['set_recovery_delay', '100000'])
138 # bring in our divergent friend
139 log
.info("revive divergent %d", divergent
)
140 manager
.raw_cluster_cmd('osd', 'set', 'noup')
141 manager
.revive_osd(divergent
)
143 log
.info('delay recovery divergent')
144 manager
.wait_run_admin_socket(
145 'osd', divergent
, ['set_recovery_delay', '100000'])
147 manager
.raw_cluster_cmd('osd', 'unset', 'noup')
148 while len(manager
.get_osd_status()['up']) < 3:
151 log
.info('wait for peering')
152 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
154 # At this point the divergent_priors should have been detected
156 log
.info("killing divergent %d", divergent
)
157 manager
.kill_osd(divergent
)
159 # Split pgs for pool foo
160 manager
.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
163 manager
.raw_cluster_cmd('pg','dump')
166 (exp_remote
,) = ctx
.\
167 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
168 FSPATH
= manager
.get_filepath()
169 JPATH
= os
.path
.join(FSPATH
, "journal")
170 prefix
= ("sudo adjust-ulimits ceph-objectstore-tool "
171 "--data-path {fpath} --journal-path {jpath} "
173 "/var/log/ceph/objectstore_tool.$$.log ".
174 format(fpath
=FSPATH
, jpath
=JPATH
))
176 expfile
= os
.path
.join(testdir
, "exp.{pid}.out".format(pid
=pid
))
177 cmd
= ((prefix
+ "--op export --pgid 2.0 --file {file}").
178 format(id=divergent
, file=expfile
))
179 proc
= exp_remote
.run(args
=cmd
, wait
=True,
180 check_status
=False, stdout
=StringIO())
181 assert proc
.exitstatus
== 0
183 # Remove the same pg that was exported
184 cmd
= ((prefix
+ "--op remove --pgid 2.0").
185 format(id=divergent
))
186 proc
= exp_remote
.run(args
=cmd
, wait
=True,
187 check_status
=False, stdout
=StringIO())
188 assert proc
.exitstatus
== 0
190 # Kill one of non-divergent OSDs
191 log
.info('killing osd.%d' % non_divergent
[0])
192 manager
.kill_osd(non_divergent
[0])
193 manager
.mark_down_osd(non_divergent
[0])
194 # manager.mark_out_osd(non_divergent[0])
196 # An empty collection for pg 2.0 might need to be cleaned up
197 cmd
= ((prefix
+ "--op remove --pgid 2.0").
198 format(id=non_divergent
[0]))
199 proc
= exp_remote
.run(args
=cmd
, wait
=True,
200 check_status
=False, stdout
=StringIO())
202 cmd
= ((prefix
+ "--op import --file {file}").
203 format(id=non_divergent
[0], file=expfile
))
204 proc
= exp_remote
.run(args
=cmd
, wait
=True,
205 check_status
=False, stdout
=StringIO())
206 assert proc
.exitstatus
== 0
208 # bring in our divergent friend and other node
209 log
.info("revive divergent %d", divergent
)
210 manager
.revive_osd(divergent
)
211 manager
.mark_in_osd(divergent
)
212 log
.info("revive %d", non_divergent
[0])
213 manager
.revive_osd(non_divergent
[0])
215 while len(manager
.get_osd_status()['up']) < 3:
218 log
.info('delay recovery divergent')
219 manager
.set_config(divergent
, osd_recovery_delay_start
=100000)
220 log
.info('mark divergent in')
221 manager
.mark_in_osd(divergent
)
223 log
.info('wait for peering')
224 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
226 log
.info("killing divergent %d", divergent
)
227 manager
.kill_osd(divergent
)
228 log
.info("reviving divergent %d", divergent
)
229 manager
.revive_osd(divergent
)
232 log
.info('allowing recovery')
233 # Set osd_recovery_delay_start back to 0 and kick the queue
235 manager
.raw_cluster_cmd('tell', 'osd.%d' % i
, 'debug',
236 'kick_recovery_wq', ' 0')
238 log
.info('reading divergent objects')
239 for i
in range(DIVERGENT_WRITE
+ DIVERGENT_REMOVE
):
240 exit_status
= rados(ctx
, mon
, ['-p', 'foo', 'get', 'existing_%d' % i
,
242 assert exit_status
is 0
245 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
246 cmd
= 'rm {file}'.format(file=expfile
)
247 remote
.run(args
=cmd
, wait
=True)