]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/reg11184.py
03db1b0641e9d67a6ff727349e77ec8911f903af
2 Special regression test for tracker #11184
4 Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid))
6 This is accomplished by moving a pg that wasn't part of split and still include
11 from cStringIO
import StringIO
13 from teuthology
import misc
as teuthology
14 from util
.rados
import rados
18 log
= logging
.getLogger(__name__
)
21 def task(ctx
, config
):
23 Test handling of divergent entries during export / import
24 to regression test tracker #11184
32 Requires 3 osds on a single test node.
36 assert isinstance(config
, dict), \
37 'divergent_priors task only accepts a dict for configuration'
39 manager
= ctx
.managers
['ceph']
41 while len(manager
.get_osd_status()['up']) < 3:
43 manager
.flush_pg_stats([0, 1, 2])
44 manager
.raw_cluster_cmd('osd', 'set', 'noout')
45 manager
.raw_cluster_cmd('osd', 'set', 'noin')
46 manager
.raw_cluster_cmd('osd', 'set', 'nodown')
47 manager
.wait_for_clean()
49 # something that is always there
50 dummyfile
= '/etc/fstab'
51 dummyfile2
= '/etc/resolv.conf'
52 testdir
= teuthology
.get_testdir(ctx
)
55 log
.info('creating foo')
56 manager
.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
60 manager
.set_config(i
, osd_min_pg_log_entries
=10)
61 manager
.set_config(i
, osd_max_pg_log_entries
=10)
62 manager
.set_config(i
, osd_pg_log_trim_min
=5)
65 divergent
= manager
.get_pg_primary('foo', 0)
66 log
.info("primary and soon to be divergent is %d", divergent
)
67 non_divergent
= list(osds
)
68 non_divergent
.remove(divergent
)
70 log
.info('writing initial objects')
71 first_mon
= teuthology
.get_first_mon(ctx
, config
)
72 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
75 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
, dummyfile
])
77 manager
.wait_for_clean()
79 # blackhole non_divergent
80 log
.info("blackholing osds %s", str(non_divergent
))
81 for i
in non_divergent
:
82 manager
.set_config(i
, objectstore_blackhole
=1)
86 # Write some soon to be divergent
87 log
.info('writing divergent objects')
88 for i
in range(DIVERGENT_WRITE
):
89 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
,
90 dummyfile2
], wait
=False)
91 # Remove some soon to be divergent
92 log
.info('remove divergent objects')
93 for i
in range(DIVERGENT_REMOVE
):
94 rados(ctx
, mon
, ['-p', 'foo', 'rm',
95 'existing_%d' % (i
+ DIVERGENT_WRITE
)], wait
=False)
98 args
=['killall', '-9', 'rados'],
102 # kill all the osds but leave divergent in
103 log
.info('killing all the osds')
107 manager
.mark_down_osd(i
)
108 for i
in non_divergent
:
109 manager
.mark_out_osd(i
)
111 # bring up non-divergent
112 log
.info("bringing up non_divergent %s", str(non_divergent
))
113 for i
in non_divergent
:
114 manager
.revive_osd(i
)
115 for i
in non_divergent
:
116 manager
.mark_in_osd(i
)
118 # write 1 non-divergent object (ensure that old divergent one is divergent)
119 objname
= "existing_%d" % (DIVERGENT_WRITE
+ DIVERGENT_REMOVE
)
120 log
.info('writing non-divergent object ' + objname
)
121 rados(ctx
, mon
, ['-p', 'foo', 'put', objname
, dummyfile2
])
123 manager
.wait_for_recovery()
125 # ensure no recovery of up osds first
126 log
.info('delay recovery')
127 for i
in non_divergent
:
128 manager
.wait_run_admin_socket(
129 'osd', i
, ['set_recovery_delay', '100000'])
131 # bring in our divergent friend
132 log
.info("revive divergent %d", divergent
)
133 manager
.raw_cluster_cmd('osd', 'set', 'noup')
134 manager
.revive_osd(divergent
)
136 log
.info('delay recovery divergent')
137 manager
.wait_run_admin_socket(
138 'osd', divergent
, ['set_recovery_delay', '100000'])
140 manager
.raw_cluster_cmd('osd', 'unset', 'noup')
141 while len(manager
.get_osd_status()['up']) < 3:
144 log
.info('wait for peering')
145 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
147 # At this point the divergent_priors should have been detected
149 log
.info("killing divergent %d", divergent
)
150 manager
.kill_osd(divergent
)
152 # Split pgs for pool foo
153 manager
.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2')
157 (exp_remote
,) = ctx
.\
158 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
159 FSPATH
= manager
.get_filepath()
160 JPATH
= os
.path
.join(FSPATH
, "journal")
161 prefix
= ("sudo adjust-ulimits ceph-objectstore-tool "
162 "--data-path {fpath} --journal-path {jpath} "
164 "/var/log/ceph/objectstore_tool.$$.log ".
165 format(fpath
=FSPATH
, jpath
=JPATH
))
167 expfile
= os
.path
.join(testdir
, "exp.{pid}.out".format(pid
=pid
))
168 cmd
= ((prefix
+ "--op export --pgid 1.0 --file {file}").
169 format(id=divergent
, file=expfile
))
170 proc
= exp_remote
.run(args
=cmd
, wait
=True,
171 check_status
=False, stdout
=StringIO())
172 assert proc
.exitstatus
== 0
174 # Remove the same pg that was exported
175 cmd
= ((prefix
+ "--op remove --pgid 1.0").
176 format(id=divergent
, file=expfile
))
177 proc
= exp_remote
.run(args
=cmd
, wait
=True,
178 check_status
=False, stdout
=StringIO())
179 assert proc
.exitstatus
== 0
181 # Kill one of non-divergent OSDs
182 log
.info('killing osd.%d' % non_divergent
[1])
183 manager
.kill_osd(non_divergent
[1])
184 manager
.mark_down_osd(non_divergent
[1])
185 # manager.mark_out_osd(non_divergent[1])
187 cmd
= ((prefix
+ "--op import --file {file}").
188 format(id=non_divergent
[1], file=expfile
))
189 proc
= exp_remote
.run(args
=cmd
, wait
=True,
190 check_status
=False, stdout
=StringIO())
191 assert proc
.exitstatus
== 0
193 # bring in our divergent friend and other node
194 log
.info("revive divergent %d", divergent
)
195 manager
.revive_osd(divergent
)
196 manager
.mark_in_osd(divergent
)
197 log
.info("revive %d", non_divergent
[1])
198 manager
.revive_osd(non_divergent
[1])
200 while len(manager
.get_osd_status()['up']) < 3:
203 log
.info('delay recovery divergent')
204 manager
.set_config(divergent
, osd_recovery_delay_start
=100000)
205 log
.info('mark divergent in')
206 manager
.mark_in_osd(divergent
)
208 log
.info('wait for peering')
209 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
211 log
.info("killing divergent %d", divergent
)
212 manager
.kill_osd(divergent
)
213 log
.info("reviving divergent %d", divergent
)
214 manager
.revive_osd(divergent
)
217 log
.info('allowing recovery')
218 # Set osd_recovery_delay_start back to 0 and kick the queue
220 manager
.raw_cluster_cmd('tell', 'osd.%d' % i
, 'debug',
221 'kick_recovery_wq', ' 0')
223 log
.info('reading divergent objects')
224 for i
in range(DIVERGENT_WRITE
+ DIVERGENT_REMOVE
):
225 exit_status
= rados(ctx
, mon
, ['-p', 'foo', 'get', 'existing_%d' % i
,
227 assert exit_status
is 0
230 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
231 cmd
= 'rm {file}'.format(file=expfile
)
232 remote
.run(args
=cmd
, wait
=True)