]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/tasks/divergent_priors2.py
2 Special case divergence test with ceph-objectstore-tool export/remove/import
6 from cStringIO
import StringIO
8 from teuthology
import misc
as teuthology
9 from util
.rados
import rados
13 log
= logging
.getLogger(__name__
)
16 def task(ctx
, config
):
18 Test handling of divergent entries with prior_version
19 prior to log_tail and a ceph-objectstore-tool export/import
27 Requires 3 osds on a single test node.
31 assert isinstance(config
, dict), \
32 'divergent_priors task only accepts a dict for configuration'
34 manager
= ctx
.managers
['ceph']
36 while len(manager
.get_osd_status()['up']) < 3:
38 manager
.flush_pg_stats([0, 1, 2])
39 manager
.raw_cluster_cmd('osd', 'set', 'noout')
40 manager
.raw_cluster_cmd('osd', 'set', 'noin')
41 manager
.raw_cluster_cmd('osd', 'set', 'nodown')
42 manager
.wait_for_clean()
44 # something that is always there
45 dummyfile
= '/etc/fstab'
46 dummyfile2
= '/etc/resolv.conf'
47 testdir
= teuthology
.get_testdir(ctx
)
50 log
.info('creating foo')
51 manager
.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1')
55 manager
.set_config(i
, osd_min_pg_log_entries
=10)
56 manager
.set_config(i
, osd_max_pg_log_entries
=10)
57 manager
.set_config(i
, osd_pg_log_trim_min
=5)
60 divergent
= manager
.get_pg_primary('foo', 0)
61 log
.info("primary and soon to be divergent is %d", divergent
)
62 non_divergent
= list(osds
)
63 non_divergent
.remove(divergent
)
65 log
.info('writing initial objects')
66 first_mon
= teuthology
.get_first_mon(ctx
, config
)
67 (mon
,) = ctx
.cluster
.only(first_mon
).remotes
.iterkeys()
70 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
, dummyfile
])
72 manager
.wait_for_clean()
74 # blackhole non_divergent
75 log
.info("blackholing osds %s", str(non_divergent
))
76 for i
in non_divergent
:
77 manager
.set_config(i
, objectstore_blackhole
=1)
81 # Write some soon to be divergent
82 log
.info('writing divergent objects')
83 for i
in range(DIVERGENT_WRITE
):
84 rados(ctx
, mon
, ['-p', 'foo', 'put', 'existing_%d' % i
,
85 dummyfile2
], wait
=False)
86 # Remove some soon to be divergent
87 log
.info('remove divergent objects')
88 for i
in range(DIVERGENT_REMOVE
):
89 rados(ctx
, mon
, ['-p', 'foo', 'rm',
90 'existing_%d' % (i
+ DIVERGENT_WRITE
)], wait
=False)
93 args
=['killall', '-9', 'rados'],
97 # kill all the osds but leave divergent in
98 log
.info('killing all the osds')
102 manager
.mark_down_osd(i
)
103 for i
in non_divergent
:
104 manager
.mark_out_osd(i
)
106 # bring up non-divergent
107 log
.info("bringing up non_divergent %s", str(non_divergent
))
108 for i
in non_divergent
:
109 manager
.revive_osd(i
)
110 for i
in non_divergent
:
111 manager
.mark_in_osd(i
)
113 # write 1 non-divergent object (ensure that old divergent one is divergent)
114 objname
= "existing_%d" % (DIVERGENT_WRITE
+ DIVERGENT_REMOVE
)
115 log
.info('writing non-divergent object ' + objname
)
116 rados(ctx
, mon
, ['-p', 'foo', 'put', objname
, dummyfile2
])
118 manager
.wait_for_recovery()
120 # ensure no recovery of up osds first
121 log
.info('delay recovery')
122 for i
in non_divergent
:
123 manager
.wait_run_admin_socket(
124 'osd', i
, ['set_recovery_delay', '100000'])
126 # bring in our divergent friend
127 log
.info("revive divergent %d", divergent
)
128 manager
.raw_cluster_cmd('osd', 'set', 'noup')
129 manager
.revive_osd(divergent
)
131 log
.info('delay recovery divergent')
132 manager
.wait_run_admin_socket(
133 'osd', divergent
, ['set_recovery_delay', '100000'])
135 manager
.raw_cluster_cmd('osd', 'unset', 'noup')
136 while len(manager
.get_osd_status()['up']) < 3:
139 log
.info('wait for peering')
140 rados(ctx
, mon
, ['-p', 'foo', 'put', 'foo', dummyfile
])
142 # At this point the divergent_priors should have been detected
144 log
.info("killing divergent %d", divergent
)
145 manager
.kill_osd(divergent
)
148 (exp_remote
,) = ctx
.\
149 cluster
.only('osd.{o}'.format(o
=divergent
)).remotes
.iterkeys()
150 FSPATH
= manager
.get_filepath()
151 JPATH
= os
.path
.join(FSPATH
, "journal")
152 prefix
= ("sudo adjust-ulimits ceph-objectstore-tool "
153 "--data-path {fpath} --journal-path {jpath} "
155 "/var/log/ceph/objectstore_tool.$$.log ".
156 format(fpath
=FSPATH
, jpath
=JPATH
))
158 expfile
= os
.path
.join(testdir
, "exp.{pid}.out".format(pid
=pid
))
159 cmd
= ((prefix
+ "--op export --pgid 2.0 --file {file}").
160 format(id=divergent
, file=expfile
))
161 proc
= exp_remote
.run(args
=cmd
, wait
=True,
162 check_status
=False, stdout
=StringIO())
163 assert proc
.exitstatus
== 0
165 cmd
= ((prefix
+ "--op remove --pgid 2.0").
166 format(id=divergent
, file=expfile
))
167 proc
= exp_remote
.run(args
=cmd
, wait
=True,
168 check_status
=False, stdout
=StringIO())
169 assert proc
.exitstatus
== 0
171 cmd
= ((prefix
+ "--op import --file {file}").
172 format(id=divergent
, file=expfile
))
173 proc
= exp_remote
.run(args
=cmd
, wait
=True,
174 check_status
=False, stdout
=StringIO())
175 assert proc
.exitstatus
== 0
177 log
.info("reviving divergent %d", divergent
)
178 manager
.revive_osd(divergent
)
179 manager
.wait_run_admin_socket('osd', divergent
, ['dump_ops_in_flight'])
182 log
.info('allowing recovery')
183 # Set osd_recovery_delay_start back to 0 and kick the queue
185 manager
.raw_cluster_cmd('tell', 'osd.%d' % i
, 'debug',
186 'kick_recovery_wq', ' 0')
188 log
.info('reading divergent objects')
189 for i
in range(DIVERGENT_WRITE
+ DIVERGENT_REMOVE
):
190 exit_status
= rados(ctx
, mon
, ['-p', 'foo', 'get', 'existing_%d' % i
,
192 assert exit_status
is 0
194 cmd
= 'rm {file}'.format(file=expfile
)
195 exp_remote
.run(args
=cmd
, wait
=True)