]> git.proxmox.com Git - ceph.git/blob - ceph/qa/workunits/rgw/test_rgw_reshard.py
ab026c7ed77b63bc424ccdc20f4f6937019b6923
[ceph.git] / ceph / qa / workunits / rgw / test_rgw_reshard.py
1 #!/usr/bin/python3
2
3 import errno
4 import logging as log
5 import time
6 import subprocess
7 import json
8 import boto3
9 import botocore.exceptions
10 import os
11
12 """
13 Rgw manual and dynamic resharding testing against a running instance
14 """
15 # The test cases in this file have been annotated for inventory.
16 # To extract the inventory (in csv format) use the command:
17 #
18 # grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
19 #
20 #
21
22 log.basicConfig(format = '%(message)s', level=log.DEBUG)
23 log.getLogger('botocore').setLevel(log.CRITICAL)
24 log.getLogger('boto3').setLevel(log.CRITICAL)
25 log.getLogger('urllib3').setLevel(log.CRITICAL)
26
27 """ Constants """
28 USER = 'tester'
29 DISPLAY_NAME = 'Testing'
30 ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A'
31 SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn'
32 BUCKET_NAME = 'a-bucket'
33 VER_BUCKET_NAME = 'myver'
34 INDEX_POOL = 'default.rgw.buckets.index'
35
36 def exec_cmd(cmd, **kwargs):
37 check_retcode = kwargs.pop('check_retcode', True)
38 kwargs['shell'] = True
39 kwargs['stdout'] = subprocess.PIPE
40 proc = subprocess.Popen(cmd, **kwargs)
41 log.info(proc.args)
42 out, _ = proc.communicate()
43 if check_retcode:
44 assert(proc.returncode == 0)
45 return out
46 return (out, proc.returncode)
47
48 class BucketStats:
49 def __init__(self, bucket_name, bucket_id, num_objs=0, size_kb=0, num_shards=0):
50 self.bucket_name = bucket_name
51 self.bucket_id = bucket_id
52 self.num_objs = num_objs
53 self.size_kb = size_kb
54 self.num_shards = num_shards if num_shards > 0 else 1
55
56 def get_num_shards(self):
57 self.num_shards = get_bucket_num_shards(self.bucket_name, self.bucket_id)
58
59
60 def get_bucket_stats(bucket_name):
61 """
62 function to get bucket stats
63 """
64 cmd = exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name))
65 json_op = json.loads(cmd)
66 #print(json.dumps(json_op, indent = 4, sort_keys=True))
67 bucket_id = json_op['id']
68 num_shards = json_op['num_shards']
69 if len(json_op['usage']) > 0:
70 num_objects = json_op['usage']['rgw.main']['num_objects']
71 size_kb = json_op['usage']['rgw.main']['size_kb']
72 else:
73 num_objects = 0
74 size_kb = 0
75 log.debug(" \nBUCKET_STATS: \nbucket: {} id: {} num_objects: {} size_kb: {} num_shards: {}\n".format(bucket_name, bucket_id,
76 num_objects, size_kb, num_shards))
77 return BucketStats(bucket_name, bucket_id, num_objects, size_kb, num_shards)
78
79 def get_bucket_layout(bucket_name):
80 res = exec_cmd("radosgw-admin bucket layout --bucket {}".format(bucket_name))
81 return json.loads(res)
82
83 def get_bucket_shard0(bucket_name):
84 bucket_id = get_bucket_stats(bucket_name).bucket_id
85 index_gen = get_bucket_layout(bucket_name)['layout']['current_index']['gen']
86 return '.dir.%s.%d.0' % (bucket_id, index_gen)
87
88 def get_bucket_num_shards(bucket_name, bucket_id):
89 """
90 function to get bucket num shards
91 """
92 metadata = 'bucket.instance:' + bucket_name + ':' + bucket_id
93 cmd = exec_cmd('radosgw-admin metadata get {}'.format(metadata))
94 json_op = json.loads(cmd)
95 num_shards = json_op['data']['bucket_info']['num_shards']
96 return num_shards
97
98 def run_bucket_reshard_cmd(bucket_name, num_shards, **kwargs):
99 cmd = 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name, num_shards)
100 cmd += ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum
101 if 'error_at' in kwargs:
102 cmd += ' --inject-error-at {}'.format(kwargs.pop('error_at'))
103 elif 'abort_at' in kwargs:
104 cmd += ' --inject-abort-at {}'.format(kwargs.pop('abort_at'))
105 if 'error_code' in kwargs:
106 cmd += ' --inject-error-code {}'.format(kwargs.pop('error_code'))
107 return exec_cmd(cmd, **kwargs)
108
109 def test_bucket_reshard(conn, name, **fault):
110 # create a bucket with non-default ACLs to verify that reshard preserves them
111 bucket = conn.create_bucket(Bucket=name, ACL='authenticated-read')
112 grants = bucket.Acl().grants
113
114 objs = []
115 try:
116 # create objs
117 for i in range(0, 20):
118 objs += [bucket.put_object(Key='key' + str(i), Body=b"some_data")]
119
120 old_shard_count = get_bucket_stats(name).num_shards
121 num_shards_expected = old_shard_count + 1
122
123 # try reshard with fault injection
124 _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False, **fault)
125
126 if fault.get('error_code') == errno.ECANCELED:
127 assert(ret == 0) # expect ECANCELED to retry and succeed
128 else:
129 assert(ret != 0 and ret != errno.EBUSY)
130
131 # check shard count
132 cur_shard_count = get_bucket_stats(name).num_shards
133 assert(cur_shard_count == old_shard_count)
134
135 # verify that the bucket is writeable by deleting an object
136 objs.pop().delete()
137
138 assert grants == bucket.Acl().grants # recheck grants after cancel
139
140 # retry reshard without fault injection. if radosgw-admin aborted,
141 # we'll have to retry until the reshard lock expires
142 while True:
143 _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False)
144 if ret == errno.EBUSY:
145 log.info('waiting 30 seconds for reshard lock to expire...')
146 time.sleep(30)
147 continue
148 assert(ret == 0)
149 break
150
151 # recheck shard count
152 final_shard_count = get_bucket_stats(name).num_shards
153 assert(final_shard_count == num_shards_expected)
154
155 assert grants == bucket.Acl().grants # recheck grants after commit
156 finally:
157 # cleanup on resharded bucket must succeed
158 bucket.delete_objects(Delete={'Objects':[{'Key':o.key} for o in objs]})
159 bucket.delete()
160
161
162 def main():
163 """
164 execute manual and dynamic resharding commands
165 """
166 # create user
167 _, ret = exec_cmd('radosgw-admin user create --uid {} --display-name {} --access-key {} --secret {}'.format(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY), check_retcode=False)
168 assert(ret == 0 or errno.EEXIST)
169
170 def boto_connect(portnum, ssl, proto):
171 endpoint = proto + '://localhost:' + portnum
172 conn = boto3.resource('s3',
173 aws_access_key_id=ACCESS_KEY,
174 aws_secret_access_key=SECRET_KEY,
175 use_ssl=ssl,
176 endpoint_url=endpoint,
177 verify=False,
178 config=None,
179 )
180 try:
181 list(conn.buckets.limit(1)) # just verify we can list buckets
182 except botocore.exceptions.ConnectionError as e:
183 print(e)
184 raise
185 print('connected to', endpoint)
186 return conn
187
188 try:
189 connection = boto_connect('80', False, 'http')
190 except botocore.exceptions.ConnectionError:
191 try: # retry on non-privileged http port
192 connection = boto_connect('8000', False, 'http')
193 except botocore.exceptions.ConnectionError:
194 # retry with ssl
195 connection = boto_connect('443', True, 'https')
196
197 # create a bucket
198 bucket = connection.create_bucket(Bucket=BUCKET_NAME)
199 ver_bucket = connection.create_bucket(Bucket=VER_BUCKET_NAME)
200 connection.BucketVersioning('ver_bucket')
201
202 bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
203 ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
204
205 # TESTCASE 'reshard-add','reshard','add','add bucket to resharding queue','succeeds'
206 log.debug('TEST: reshard add\n')
207
208 num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
209 cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
210 cmd = exec_cmd('radosgw-admin reshard list')
211 json_op = json.loads(cmd)
212 log.debug('bucket name {}'.format(json_op[0]['bucket_name']))
213 assert json_op[0]['bucket_name'] == BUCKET_NAME
214 assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
215
216 # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds'
217 log.debug('TEST: reshard process\n')
218 cmd = exec_cmd('radosgw-admin reshard process')
219 time.sleep(5)
220 # check bucket shards num
221 bucket_stats1 = get_bucket_stats(BUCKET_NAME)
222 if bucket_stats1.num_shards != num_shards_expected:
223 log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
224
225 # TESTCASE 'reshard-add','reshard','add','add non empty bucket to resharding queue','succeeds'
226 log.debug('TEST: reshard add non empty bucket\n')
227 # create objs
228 num_objs = 8
229 for i in range(0, num_objs):
230 connection.Object(BUCKET_NAME, ('key'+str(i))).put(Body=b"some_data")
231
232 num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1
233 cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected))
234 cmd = exec_cmd('radosgw-admin reshard list')
235 json_op = json.loads(cmd)
236 assert json_op[0]['bucket_name'] == BUCKET_NAME
237 assert json_op[0]['tentative_new_num_shards'] == num_shards_expected
238
239 # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds'
240 log.debug('TEST: reshard process non empty bucket\n')
241 cmd = exec_cmd('radosgw-admin reshard process')
242 # check bucket shards num
243 bucket_stats1 = get_bucket_stats(BUCKET_NAME)
244 if bucket_stats1.num_shards != num_shards_expected:
245 log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME))
246
247 # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard'
248 log.debug('TEST: reshard bucket with EIO injected at set_target_layout\n')
249 test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout')
250 log.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n')
251 test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout', error_code=errno.ECANCELED)
252 log.debug('TEST: reshard bucket with abort at set_target_layout\n')
253 test_bucket_reshard(connection, 'abort-at-set-target-layout', abort_at='set_target_layout')
254
255 log.debug('TEST: reshard bucket with EIO injected at block_writes\n')
256 test_bucket_reshard(connection, 'error-at-block-writes', error_at='block_writes')
257 log.debug('TEST: reshard bucket with abort at block_writes\n')
258 test_bucket_reshard(connection, 'abort-at-block-writes', abort_at='block_writes')
259
260 log.debug('TEST: reshard bucket with EIO injected at commit_target_layout\n')
261 test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout')
262 log.debug('TEST: reshard bucket with ECANCELED injected at commit_target_layout\n')
263 test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout', error_code=errno.ECANCELED)
264 log.debug('TEST: reshard bucket with abort at commit_target_layout\n')
265 test_bucket_reshard(connection, 'abort-at-commit-target-layout', abort_at='commit_target_layout')
266
267 log.debug('TEST: reshard bucket with EIO injected at do_reshard\n')
268 test_bucket_reshard(connection, 'error-at-do-reshard', error_at='do_reshard')
269 log.debug('TEST: reshard bucket with abort at do_reshard\n')
270 test_bucket_reshard(connection, 'abort-at-do-reshard', abort_at='do_reshard')
271
272 # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds'
273 log.debug(' test: reshard versioned bucket')
274 num_shards_expected = get_bucket_stats(VER_BUCKET_NAME).num_shards + 1
275 cmd = exec_cmd('radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(VER_BUCKET_NAME,
276 num_shards_expected))
277 # check bucket shards num
278 ver_bucket_stats = get_bucket_stats(VER_BUCKET_NAME)
279 assert ver_bucket_stats.num_shards == num_shards_expected
280
281 # TESTCASE 'check acl'
282 new_bucket_acl = connection.BucketAcl(BUCKET_NAME).load()
283 assert new_bucket_acl == bucket_acl
284 new_ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load()
285 assert new_ver_bucket_acl == ver_bucket_acl
286
287 # TESTCASE 'check reshard removes olh entries with empty name'
288 log.debug(' test: reshard removes olh entries with empty name')
289 bucket.objects.all().delete()
290
291
292 # get name of shard 0 object, add a bogus olh entry with empty name
293 bucket_shard0 = get_bucket_shard0(BUCKET_NAME)
294 if 'CEPH_ROOT' in os.environ:
295 k = '%s/qa/workunits/rgw/olh_noname_key' % os.environ['CEPH_ROOT']
296 v = '%s/qa/workunits/rgw/olh_noname_val' % os.environ['CEPH_ROOT']
297 else:
298 k = 'olh_noname_key'
299 v = 'olh_noname_val'
300 exec_cmd('rados -p %s setomapval %s --omap-key-file %s < %s' % (INDEX_POOL, bucket_shard0, k, v))
301
302 # check that bi list has one entry with empty name
303 cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
304 json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
305 assert len(json_op) == 1
306 assert json_op[0]['entry']['key']['name'] == ''
307
308 # reshard to prune the bogus olh
309 cmd = exec_cmd('radosgw-admin bucket reshard --bucket %s --num-shards %s --yes-i-really-mean-it' % (BUCKET_NAME, 1))
310
311 # get that bi list has zero entries
312 cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME)
313 json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80
314 assert len(json_op) == 0
315
316 # Clean up
317 log.debug("Deleting bucket {}".format(BUCKET_NAME))
318 bucket.objects.all().delete()
319 bucket.delete()
320 log.debug("Deleting bucket {}".format(VER_BUCKET_NAME))
321 ver_bucket.delete()
322
323
324 main()
325 log.info("Completed resharding tests")