]>
git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/rgw/test_rgw_reshard.py
ab026c7ed77b63bc424ccdc20f4f6937019b6923
9 import botocore
.exceptions
13 Rgw manual and dynamic resharding testing against a running instance
15 # The test cases in this file have been annotated for inventory.
16 # To extract the inventory (in csv format) use the command:
18 # grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
22 log
.basicConfig(format
= '%(message)s', level
=log
.DEBUG
)
23 log
.getLogger('botocore').setLevel(log
.CRITICAL
)
24 log
.getLogger('boto3').setLevel(log
.CRITICAL
)
25 log
.getLogger('urllib3').setLevel(log
.CRITICAL
)
29 DISPLAY_NAME
= 'Testing'
30 ACCESS_KEY
= 'NX5QOQKC6BH2IDN8HC7A'
31 SECRET_KEY
= 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
32 BUCKET_NAME
= 'a-bucket'
33 VER_BUCKET_NAME
= 'myver'
34 INDEX_POOL
= 'default.rgw.buckets.index'
36 def exec_cmd(cmd
, **kwargs
):
37 check_retcode
= kwargs
.pop('check_retcode', True)
38 kwargs
['shell'] = True
39 kwargs
['stdout'] = subprocess
.PIPE
40 proc
= subprocess
.Popen(cmd
, **kwargs
)
42 out
, _
= proc
.communicate()
44 assert(proc
.returncode
== 0)
46 return (out
, proc
.returncode
)
49 def __init__(self
, bucket_name
, bucket_id
, num_objs
=0, size_kb
=0, num_shards
=0):
50 self
.bucket_name
= bucket_name
51 self
.bucket_id
= bucket_id
52 self
.num_objs
= num_objs
53 self
.size_kb
= size_kb
54 self
.num_shards
= num_shards
if num_shards
> 0 else 1
56 def get_num_shards(self
):
57 self
.num_shards
= get_bucket_num_shards(self
.bucket_name
, self
.bucket_id
)
60 def get_bucket_stats(bucket_name
):
62 function to get bucket stats
64 cmd
= exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name
))
65 json_op
= json
.loads(cmd
)
66 #print(json.dumps(json_op, indent = 4, sort_keys=True))
67 bucket_id
= json_op
['id']
68 num_shards
= json_op
['num_shards']
69 if len(json_op
['usage']) > 0:
70 num_objects
= json_op
['usage']['rgw.main']['num_objects']
71 size_kb
= json_op
['usage']['rgw.main']['size_kb']
75 log
.debug(" \nBUCKET_STATS: \nbucket: {} id: {} num_objects: {} size_kb: {} num_shards: {}\n".format(bucket_name
, bucket_id
,
76 num_objects
, size_kb
, num_shards
))
77 return BucketStats(bucket_name
, bucket_id
, num_objects
, size_kb
, num_shards
)
79 def get_bucket_layout(bucket_name
):
80 res
= exec_cmd("radosgw-admin bucket layout --bucket {}".format(bucket_name
))
81 return json
.loads(res
)
83 def get_bucket_shard0(bucket_name
):
84 bucket_id
= get_bucket_stats(bucket_name
).bucket_id
85 index_gen
= get_bucket_layout(bucket_name
)['layout']['current_index']['gen']
86 return '.dir.%s.%d.0' % (bucket_id
, index_gen
)
88 def get_bucket_num_shards(bucket_name
, bucket_id
):
90 function to get bucket num shards
92 metadata
= 'bucket.instance:' + bucket_name
+ ':' + bucket_id
93 cmd
= exec_cmd('radosgw-admin metadata get {}'.format(metadata
))
94 json_op
= json
.loads(cmd
)
95 num_shards
= json_op
['data']['bucket_info']['num_shards']
98 def run_bucket_reshard_cmd(bucket_name
, num_shards
, **kwargs
):
99 cmd
= 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name
, num_shards
)
100 cmd
+= ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum
101 if 'error_at' in kwargs
:
102 cmd
+= ' --inject-error-at {}'.format(kwargs
.pop('error_at'))
103 elif 'abort_at' in kwargs
:
104 cmd
+= ' --inject-abort-at {}'.format(kwargs
.pop('abort_at'))
105 if 'error_code' in kwargs
:
106 cmd
+= ' --inject-error-code {}'.format(kwargs
.pop('error_code'))
107 return exec_cmd(cmd
, **kwargs
)
109 def test_bucket_reshard(conn
, name
, **fault
):
110 # create a bucket with non-default ACLs to verify that reshard preserves them
111 bucket
= conn
.create_bucket(Bucket
=name
, ACL
='authenticated-read')
112 grants
= bucket
.Acl().grants
117 for i
in range(0, 20):
118 objs
+= [bucket
.put_object(Key
='key' + str(i
), Body
=b
"some_data")]
120 old_shard_count
= get_bucket_stats(name
).num_shards
121 num_shards_expected
= old_shard_count
+ 1
123 # try reshard with fault injection
124 _
, ret
= run_bucket_reshard_cmd(name
, num_shards_expected
, check_retcode
=False, **fault
)
126 if fault
.get('error_code') == errno
.ECANCELED
:
127 assert(ret
== 0) # expect ECANCELED to retry and succeed
129 assert(ret
!= 0 and ret
!= errno
.EBUSY
)
132 cur_shard_count
= get_bucket_stats(name
).num_shards
133 assert(cur_shard_count
== old_shard_count
)
135 # verify that the bucket is writeable by deleting an object
138 assert grants
== bucket
.Acl().grants
# recheck grants after cancel
140 # retry reshard without fault injection. if radosgw-admin aborted,
141 # we'll have to retry until the reshard lock expires
143 _
, ret
= run_bucket_reshard_cmd(name
, num_shards_expected
, check_retcode
=False)
144 if ret
== errno
.EBUSY
:
145 log
.info('waiting 30 seconds for reshard lock to expire...')
151 # recheck shard count
152 final_shard_count
= get_bucket_stats(name
).num_shards
153 assert(final_shard_count
== num_shards_expected
)
155 assert grants
== bucket
.Acl().grants
# recheck grants after commit
157 # cleanup on resharded bucket must succeed
158 bucket
.delete_objects(Delete
={'Objects':[{'Key':o
.key
} for o
in objs
]})
164 execute manual and dynamic resharding commands
167 _
, ret
= exec_cmd('radosgw-admin user create --uid {} --display-name {} --access-key {} --secret {}'.format(USER
, DISPLAY_NAME
, ACCESS_KEY
, SECRET_KEY
), check_retcode
=False)
168 assert(ret
== 0 or errno
.EEXIST
)
170 def boto_connect(portnum
, ssl
, proto
):
171 endpoint
= proto
+ '://localhost:' + portnum
172 conn
= boto3
.resource('s3',
173 aws_access_key_id
=ACCESS_KEY
,
174 aws_secret_access_key
=SECRET_KEY
,
176 endpoint_url
=endpoint
,
181 list(conn
.buckets
.limit(1)) # just verify we can list buckets
182 except botocore
.exceptions
.ConnectionError
as e
:
185 print('connected to', endpoint
)
189 connection
= boto_connect('80', False, 'http')
190 except botocore
.exceptions
.ConnectionError
:
191 try: # retry on non-privileged http port
192 connection
= boto_connect('8000', False, 'http')
193 except botocore
.exceptions
.ConnectionError
:
195 connection
= boto_connect('443', True, 'https')
198 bucket
= connection
.create_bucket(Bucket
=BUCKET_NAME
)
199 ver_bucket
= connection
.create_bucket(Bucket
=VER_BUCKET_NAME
)
200 connection
.BucketVersioning('ver_bucket')
202 bucket_acl
= connection
.BucketAcl(BUCKET_NAME
).load()
203 ver_bucket_acl
= connection
.BucketAcl(VER_BUCKET_NAME
).load()
205 # TESTCASE 'reshard-add','reshard','add','add bucket to resharding queue','succeeds'
206 log
.debug('TEST: reshard add\n')
208 num_shards_expected
= get_bucket_stats(BUCKET_NAME
).num_shards
+ 1
209 cmd
= exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME
, num_shards_expected
))
210 cmd
= exec_cmd('radosgw-admin reshard list')
211 json_op
= json
.loads(cmd
)
212 log
.debug('bucket name {}'.format(json_op
[0]['bucket_name']))
213 assert json_op
[0]['bucket_name'] == BUCKET_NAME
214 assert json_op
[0]['tentative_new_num_shards'] == num_shards_expected
216 # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds'
217 log
.debug('TEST: reshard process\n')
218 cmd
= exec_cmd('radosgw-admin reshard process')
220 # check bucket shards num
221 bucket_stats1
= get_bucket_stats(BUCKET_NAME
)
222 if bucket_stats1
.num_shards
!= num_shards_expected
:
223 log
.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME
))
225 # TESTCASE 'reshard-add','reshard','add','add non empty bucket to resharding queue','succeeds'
226 log
.debug('TEST: reshard add non empty bucket\n')
229 for i
in range(0, num_objs
):
230 connection
.Object(BUCKET_NAME
, ('key'+str(i
))).put(Body
=b
"some_data")
232 num_shards_expected
= get_bucket_stats(BUCKET_NAME
).num_shards
+ 1
233 cmd
= exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME
, num_shards_expected
))
234 cmd
= exec_cmd('radosgw-admin reshard list')
235 json_op
= json
.loads(cmd
)
236 assert json_op
[0]['bucket_name'] == BUCKET_NAME
237 assert json_op
[0]['tentative_new_num_shards'] == num_shards_expected
239 # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds'
240 log
.debug('TEST: reshard process non empty bucket\n')
241 cmd
= exec_cmd('radosgw-admin reshard process')
242 # check bucket shards num
243 bucket_stats1
= get_bucket_stats(BUCKET_NAME
)
244 if bucket_stats1
.num_shards
!= num_shards_expected
:
245 log
.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME
))
247 # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard'
248 log
.debug('TEST: reshard bucket with EIO injected at set_target_layout\n')
249 test_bucket_reshard(connection
, 'error-at-set-target-layout', error_at
='set_target_layout')
250 log
.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n')
251 test_bucket_reshard(connection
, 'error-at-set-target-layout', error_at
='set_target_layout', error_code
=errno
.ECANCELED
)
252 log
.debug('TEST: reshard bucket with abort at set_target_layout\n')
253 test_bucket_reshard(connection
, 'abort-at-set-target-layout', abort_at
='set_target_layout')
255 log
.debug('TEST: reshard bucket with EIO injected at block_writes\n')
256 test_bucket_reshard(connection
, 'error-at-block-writes', error_at
='block_writes')
257 log
.debug('TEST: reshard bucket with abort at block_writes\n')
258 test_bucket_reshard(connection
, 'abort-at-block-writes', abort_at
='block_writes')
260 log
.debug('TEST: reshard bucket with EIO injected at commit_target_layout\n')
261 test_bucket_reshard(connection
, 'error-at-commit-target-layout', error_at
='commit_target_layout')
262 log
.debug('TEST: reshard bucket with ECANCELED injected at commit_target_layout\n')
263 test_bucket_reshard(connection
, 'error-at-commit-target-layout', error_at
='commit_target_layout', error_code
=errno
.ECANCELED
)
264 log
.debug('TEST: reshard bucket with abort at commit_target_layout\n')
265 test_bucket_reshard(connection
, 'abort-at-commit-target-layout', abort_at
='commit_target_layout')
267 log
.debug('TEST: reshard bucket with EIO injected at do_reshard\n')
268 test_bucket_reshard(connection
, 'error-at-do-reshard', error_at
='do_reshard')
269 log
.debug('TEST: reshard bucket with abort at do_reshard\n')
270 test_bucket_reshard(connection
, 'abort-at-do-reshard', abort_at
='do_reshard')
272 # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds'
273 log
.debug(' test: reshard versioned bucket')
274 num_shards_expected
= get_bucket_stats(VER_BUCKET_NAME
).num_shards
+ 1
275 cmd
= exec_cmd('radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(VER_BUCKET_NAME
,
276 num_shards_expected
))
277 # check bucket shards num
278 ver_bucket_stats
= get_bucket_stats(VER_BUCKET_NAME
)
279 assert ver_bucket_stats
.num_shards
== num_shards_expected
281 # TESTCASE 'check acl'
282 new_bucket_acl
= connection
.BucketAcl(BUCKET_NAME
).load()
283 assert new_bucket_acl
== bucket_acl
284 new_ver_bucket_acl
= connection
.BucketAcl(VER_BUCKET_NAME
).load()
285 assert new_ver_bucket_acl
== ver_bucket_acl
287 # TESTCASE 'check reshard removes olh entries with empty name'
288 log
.debug(' test: reshard removes olh entries with empty name')
289 bucket
.objects
.all().delete()
292 # get name of shard 0 object, add a bogus olh entry with empty name
293 bucket_shard0
= get_bucket_shard0(BUCKET_NAME
)
294 if 'CEPH_ROOT' in os
.environ
:
295 k
= '%s/qa/workunits/rgw/olh_noname_key' % os
.environ
['CEPH_ROOT']
296 v
= '%s/qa/workunits/rgw/olh_noname_val' % os
.environ
['CEPH_ROOT']
300 exec_cmd('rados -p %s setomapval %s --omap-key-file %s < %s' % (INDEX_POOL
, bucket_shard0
, k
, v
))
302 # check that bi list has one entry with empty name
303 cmd
= exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME
)
304 json_op
= json
.loads(cmd
.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
305 assert len(json_op
) == 1
306 assert json_op
[0]['entry']['key']['name'] == ''
308 # reshard to prune the bogus olh
309 cmd
= exec_cmd('radosgw-admin bucket reshard --bucket %s --num-shards %s --yes-i-really-mean-it' % (BUCKET_NAME
, 1))
311 # get that bi list has zero entries
312 cmd
= exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME
)
313 json_op
= json
.loads(cmd
.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
314 assert len(json_op
) == 0
317 log
.debug("Deleting bucket {}".format(BUCKET_NAME
))
318 bucket
.objects
.all().delete()
320 log
.debug("Deleting bucket {}".format(VER_BUCKET_NAME
))
325 log
.info("Completed resharding tests")