]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/rgw/test_rgw_reshard.py
update ceph source to reef 18.2.1
[ceph.git] / ceph / qa / workunits / rgw / test_rgw_reshard.py
1 #!/usr/bin/python3
2
3 import errno
4 import time
5 import logging as log
6 import json
7 import os
8 from common import exec_cmd, boto_connect, create_user, put_objects, create_unlinked_objects
9
10 """
11 Rgw manual and dynamic resharding testing against a running instance
12 """
13 # The test cases in this file have been annotated for inventory.
14 # To extract the inventory (in csv format) use the command:
15 #
16 # grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
17 #
18 #
19
20 """ Constants """
21 USER = 'tester'
22 DISPLAY_NAME = 'Testing'
23 ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A'
24 SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
25 BUCKET_NAME = 'a-bucket'
26 VER_BUCKET_NAME = 'myver'
27 INDEX_POOL = 'default.rgw.buckets.index'
28
29 class BucketStats:
30 def __init__(self, bucket_name, bucket_id, num_objs=0, size_kb=0, num_shards=0):
31 self.bucket_name = bucket_name
32 self.bucket_id = bucket_id
33 self.num_objs = num_objs
34 self.size_kb = size_kb
35 self.num_shards = num_shards if num_shards > 0 else 1
36
37 def get_num_shards(self):
38 self.num_shards = get_bucket_num_shards(self.bucket_name, self.bucket_id)
39
40
41 def get_bucket_stats(bucket_name):
42 """
43 function to get bucket stats
44 """
45 cmd = exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name))
46 json_op = json.loads(cmd)
47 #print(json.dumps(json_op, indent = 4, sort_keys=True))
48 bucket_id = json_op['id']
49 num_shards = json_op['num_shards']
50 if len(json_op['usage']) > 0:
51 num_objects = json_op['usage']['rgw.main']['num_objects']
52 size_kb = json_op['usage']['rgw.main']['size_kb']
53 else:
54 num_objects = 0
55 size_kb = 0
56 log.debug(" \nBUCKET_STATS: \nbucket: {} id: {} num_objects: {} size_kb: {} num_shards: {}\n".format(bucket_name, bucket_id,
57 num_objects, size_kb, num_shards))
58 return BucketStats(bucket_name, bucket_id, num_objects, size_kb, num_shards)
59
60 def get_bucket_layout(bucket_name):
61 res = exec_cmd("radosgw-admin bucket layout --bucket {}".format(bucket_name))
62 return json.loads(res)
63
64 def get_bucket_shard0(bucket_name):
65 bucket_id = get_bucket_stats(bucket_name).bucket_id
66 index_gen = get_bucket_layout(bucket_name)['layout']['current_index']['gen']
67 return '.dir.%s.%d.0' % (bucket_id, index_gen)
68
69 def get_bucket_num_shards(bucket_name, bucket_id):
70 """
71 function to get bucket num shards
72 """
73 metadata = 'bucket.instance:' + bucket_name + ':' + bucket_id
74 cmd = exec_cmd('radosgw-admin metadata get {}'.format(metadata))
75 json_op = json.loads(cmd)
76 num_shards = json_op['data']['bucket_info']['num_shards']
77 return num_shards
78
79 def run_bucket_reshard_cmd(bucket_name, num_shards, **kwargs):
80 cmd = 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name, num_shards)
81 cmd += ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum
82 if 'error_at' in kwargs:
83 cmd += ' --inject-error-at {}'.format(kwargs.pop('error_at'))
84 elif 'abort_at' in kwargs:
85 cmd += ' --inject-abort-at {}'.format(kwargs.pop('abort_at'))
86 if 'error_code' in kwargs:
87 cmd += ' --inject-error-code {}'.format(kwargs.pop('error_code'))
88 return exec_cmd(cmd, **kwargs)
89
90 def test_bucket_reshard(conn, name, **fault):
91 # create a bucket with non-default ACLs to verify that reshard preserves them
92 bucket = conn.create_bucket(Bucket=name, ACL='authenticated-read')
93 grants = bucket.Acl().grants
94
95 objs = []
96 try:
97 # create objs
98 for i in range(0, 20):
99 objs += [bucket.put_object(Key='key' + str(i), Body=b"some_data")]
100
101 old_shard_count = get_bucket_stats(name).num_shards
102 num_shards_expected = old_shard_count + 1
103
104 # try reshard with fault injection
105 _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False, **fault)
106
107 if fault.get('error_code') == errno.ECANCELED:
108 assert(ret == 0) # expect ECANCELED to retry and succeed
109 else:
110 assert(ret != 0 and ret != errno.EBUSY)
111
112 # check shard count
113 cur_shard_count = get_bucket_stats(name).num_shards
114 assert(cur_shard_count == old_shard_count)
115
116 # verify that the bucket is writeable by deleting an object
117 objs.pop().delete()
118
119 assert grants == bucket.Acl().grants # recheck grants after cancel
120
121 # retry reshard without fault injection. if radosgw-admin aborted,
122 # we'll have to retry until the reshard lock expires
123 while True:
124 _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False)
125 if ret == errno.EBUSY:
126 log.info('waiting 30 seconds for reshard lock to expire...')
127 time.sleep(30)
128 continue
129 assert(ret == 0)
130 break
131
132 # recheck shard count
133 final_shard_count = get_bucket_stats(name).num_shards
134 assert(final_shard_count == num_shards_expected)
135
136 assert grants == bucket.Acl().grants # recheck grants after commit
137 finally:
138 # cleanup on resharded bucket must succeed
139 bucket.delete_objects(Delete={'Objects':[{'Key':o.key} for o in objs]})
140 bucket.delete()
141
142
143 def main():
144 """
145 execute manual and dynamic resharding commands
146 """
147 create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)
148
149 connection = boto_connect(ACCESS_KEY, SECRET_KEY)
150
151 # create a bucket
152 bucket = connection.create_bucket(Bucket=BUCKET_NAME)
153 ver_bucket = connection.create_bucket(Bucket=VER_BUCKET_NAME)
154 connection.BucketVersioning(VER_BUCKET_NAME).enable()
155
156 bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
157 ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
158
159 # TESTCASE 'reshard-add','reshard','add','add bucket to resharding queue','succeeds'
160 log.debug('TEST: reshard add\n')
161
162 num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
163 cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
164 cmd = exec_cmd('radosgw-admin reshard list')
165 json_op = json.loads(cmd)
166 log.debug('bucket name {}'.format(json_op[0]['bucket_name']))
167 assert json_op[0]['bucket_name'] == BUCKET_NAME
168 assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
169
170 # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds'
171 log.debug('TEST: reshard process\n')
172 cmd = exec_cmd('radosgw-admin reshard process')
173 time.sleep(5)
174 # check bucket shards num
175 bucket_stats1 = get_bucket_stats(BUCKET_NAME)
176 if bucket_stats1.num_shards != num_shards_expected:
177 log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
178
179 # TESTCASE 'reshard-add','reshard','add','add non empty bucket to resharding queue','succeeds'
180 log.debug('TEST: reshard add non empty bucket\n')
181 # create objs
182 num_objs = 8
183 for i in range(0, num_objs):
184 connection.Object(BUCKET_NAME, ('key'+str(i))).put(Body=b"some_data")
185
186 num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
187 cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
188 cmd = exec_cmd('radosgw-admin reshard list')
189 json_op = json.loads(cmd)
190 assert json_op[0]['bucket_name'] == BUCKET_NAME
191 assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
192
193 # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds'
194 log.debug('TEST: reshard process non empty bucket\n')
195 cmd = exec_cmd('radosgw-admin reshard process')
196 # check bucket shards num
197 bucket_stats1 = get_bucket_stats(BUCKET_NAME)
198 if bucket_stats1.num_shards != num_shards_expected:
199 log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
200
201 # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard'
202 log.debug('TEST: reshard bucket with EIO injected at set_target_layout\n')
203 test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout')
204 log.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n')
205 test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout', error_code=errno.ECANCELED)
206 log.debug('TEST: reshard bucket with abort at set_target_layout\n')
207 test_bucket_reshard(connection, 'abort-at-set-target-layout', abort_at='set_target_layout')
208
209 log.debug('TEST: reshard bucket with EIO injected at block_writes\n')
210 test_bucket_reshard(connection, 'error-at-block-writes', error_at='block_writes')
211 log.debug('TEST: reshard bucket with abort at block_writes\n')
212 test_bucket_reshard(connection, 'abort-at-block-writes', abort_at='block_writes')
213
214 log.debug('TEST: reshard bucket with EIO injected at commit_target_layout\n')
215 test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout')
216 log.debug('TEST: reshard bucket with ECANCELED injected at commit_target_layout\n')
217 test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout', error_code=errno.ECANCELED)
218 log.debug('TEST: reshard bucket with abort at commit_target_layout\n')
219 test_bucket_reshard(connection, 'abort-at-commit-target-layout', abort_at='commit_target_layout')
220
221 log.debug('TEST: reshard bucket with EIO injected at do_reshard\n')
222 test_bucket_reshard(connection, 'error-at-do-reshard', error_at='do_reshard')
223 log.debug('TEST: reshard bucket with abort at do_reshard\n')
224 test_bucket_reshard(connection, 'abort-at-do-reshard', abort_at='do_reshard')
225
226 # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds'
227 log.debug(' test: reshard versioned bucket')
228 num_shards_expected = get_bucket_stats(VER_BUCKET_NAME).num_shards + 1
229 cmd = exec_cmd('radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(VER_BUCKET_NAME,
230 num_shards_expected))
231 # check bucket shards num
232 ver_bucket_stats = get_bucket_stats(VER_BUCKET_NAME)
233 assert ver_bucket_stats.num_shards == num_shards_expected
234
235 # TESTCASE 'check acl'
236 new_bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
237 assert new_bucket_acl == bucket_acl
238 new_ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
239 assert new_ver_bucket_acl == ver_bucket_acl
240
241 # TESTCASE 'check reshard removes olh entries with empty name'
242 log.debug(' test: reshard removes olh entries with empty name')
243 bucket.objects.all().delete()
244
245
246 # get name of shard 0 object, add a bogus olh entry with empty name
247 bucket_shard0 = get_bucket_shard0(BUCKET_NAME)
248 if 'CEPH_ROOT' in os.environ:
249 k = '%s/qa/workunits/rgw/olh_noname_key' % os.environ['CEPH_ROOT']
250 v = '%s/qa/workunits/rgw/olh_noname_val' % os.environ['CEPH_ROOT']
251 else:
252 k = 'olh_noname_key'
253 v = 'olh_noname_val'
254 exec_cmd('rados -p %s setomapval %s --omap-key-file %s < %s' % (INDEX_POOL, bucket_shard0, k, v))
255
256 # check that bi list has one entry with empty name
257 cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
258 json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
259 assert len(json_op) == 1
260 assert json_op[0]['entry']['key']['name'] == ''
261
262 # reshard to prune the bogus olh
263 cmd = exec_cmd('radosgw-admin bucket reshard --bucket %s --num-shards %s --yes-i-really-mean-it' % (BUCKET_NAME, 1))
264
265 # get that bi list has zero entries
266 cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
267 json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
268 assert len(json_op) == 0
269
270 # TESTCASE 'check that PUT succeeds during reshard'
271 log.debug(' test: PUT succeeds during reshard')
272 num_shards = get_bucket_stats(VER_BUCKET_NAME).num_shards
273 exec_cmd('''radosgw-admin --inject-delay-at=do_reshard --inject-delay-ms=5000 \
274 bucket reshard --bucket {} --num-shards {}'''
275 .format(VER_BUCKET_NAME, num_shards + 1), wait = False)
276 time.sleep(1)
277 ver_bucket.put_object(Key='put_during_reshard', Body=b"some_data")
278 log.debug('put object successful')
279
280 # TESTCASE 'check that bucket stats are correct after reshard with unlinked entries'
281 log.debug('TEST: check that bucket stats are correct after reshard with unlinked entries\n')
282 ver_bucket.object_versions.all().delete()
283 ok_keys = ['a', 'b', 'c']
284 unlinked_keys = ['x', 'y', 'z']
285 put_objects(ver_bucket, ok_keys)
286 create_unlinked_objects(connection, ver_bucket, unlinked_keys)
287 cmd = exec_cmd(f'radosgw-admin bucket reshard --bucket {VER_BUCKET_NAME} --num-shards 17 --yes-i-really-mean-it')
288 out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {VER_BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys')
289 json_out = json.loads(out)
290 assert len(json_out) == len(unlinked_keys)
291 ver_bucket.object_versions.all().delete()
292 out = exec_cmd(f'radosgw-admin bucket stats --bucket {VER_BUCKET_NAME}')
293 json_out = json.loads(out)
294 log.debug(json_out['usage'])
295 assert json_out['usage']['rgw.main']['size'] == 0
296 assert json_out['usage']['rgw.main']['num_objects'] == 0
297 assert json_out['usage']['rgw.main']['size_actual'] == 0
298 assert json_out['usage']['rgw.main']['size_kb'] == 0
299 assert json_out['usage']['rgw.main']['size_kb_actual'] == 0
300 assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0
301
302 # Clean up
303 log.debug("Deleting bucket {}".format(BUCKET_NAME))
304 bucket.objects.all().delete()
305 bucket.delete()
306 log.debug("Deleting bucket {}".format(VER_BUCKET_NAME))
307 ver_bucket.object_versions.all().delete()
308 ver_bucket.delete()
309
310 main()
311 log.info("Completed resharding tests")