]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | #!/usr/bin/env bash |
2 | # | |
3 | # Copyright (C) 2018 Red Hat <contact@redhat.com> | |
4 | # | |
5 | # Author: David Zafman <dzafman@redhat.com> | |
6 | # | |
7 | # This program is free software; you can redistribute it and/or modify | |
8 | # it under the terms of the GNU Library Public License as published by | |
9 | # the Free Software Foundation; either version 2, or (at your option) | |
10 | # any later version. | |
11 | # | |
12 | # This program is distributed in the hope that it will be useful, | |
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | # GNU Library Public License for more details. | |
16 | # | |
17 | ||
18 | source $CEPH_ROOT/qa/standalone/ceph-helpers.sh | |
19 | ||
20 | function run() { | |
21 | local dir=$1 | |
22 | shift | |
23 | ||
24 | export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one | |
25 | export CEPH_ARGS | |
26 | CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " | |
27 | CEPH_ARGS+="--mon-host=$CEPH_MON " | |
28 | CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " | |
29 | CEPH_ARGS+="--fake_statfs_for_testing=3686400 " | |
30 | CEPH_ARGS+="--osd_max_backfills=10 " | |
31 | export objects=600 | |
32 | export poolprefix=test | |
33 | ||
34 | local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} | |
35 | for func in $funcs ; do | |
36 | setup $dir || return 1 | |
37 | $func $dir || return 1 | |
38 | teardown $dir || return 1 | |
39 | done | |
40 | } | |
41 | ||
42 | ||
43 | function get_num_in_state() { | |
44 | local state=$1 | |
45 | local expression | |
46 | expression+="select(contains(\"${state}\"))" | |
47 | ceph --format json pg dump pgs 2>/dev/null | \ | |
48 | jq ".pg_stats | [.[] | .state | $expression] | length" | |
49 | } | |
50 | ||
51 | ||
52 | function wait_for_state() { | |
53 | local state=$1 | |
54 | local num_in_state=-1 | |
55 | local cur_in_state | |
56 | local -a delays=($(get_timeout_delays $2 5)) | |
57 | local -i loop=0 | |
58 | ||
59 | flush_pg_stats || return 1 | |
60 | while test $(get_num_pgs) == 0 ; do | |
61 | sleep 1 | |
62 | done | |
63 | ||
64 | while true ; do | |
65 | cur_in_state=$(get_num_in_state ${state}) | |
66 | test $cur_in_state = "0" && break | |
67 | if test $cur_in_state != $num_in_state ; then | |
68 | loop=0 | |
69 | num_in_state=$cur_in_state | |
70 | elif (( $loop >= ${#delays[*]} )) ; then | |
71 | ceph pg dump pgs | |
72 | return 1 | |
73 | fi | |
74 | sleep ${delays[$loop]} | |
75 | loop+=1 | |
76 | done | |
77 | return 0 | |
78 | } | |
79 | ||
80 | ||
81 | function wait_for_backfill() { | |
82 | local timeout=$1 | |
83 | wait_for_state backfilling $timeout | |
84 | } | |
85 | ||
86 | ||
87 | function wait_for_active() { | |
88 | local timeout=$1 | |
89 | wait_for_state activating $timeout | |
90 | } | |
91 | ||
92 | # All tests are created in an environment which has fake total space | |
93 | # of 3600K (3686400) which can hold 600 6K replicated objects or | |
94 | # 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool | |
95 | # we have a theoretical 54K object but with the chunk size of 4K | |
96 | # and a rounding of 4K to account for the chunks is 36K max object | |
97 | # which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of | |
98 | # 3600K for a shard. | |
99 | ||
100 | # Create 2 pools with size 1 | |
101 | # Write enough data that only 1 pool pg can fit per osd | |
102 | # Incresase the pool size to 2 | |
103 | # On 3 OSDs this should result in 1 OSD with overlapping replicas, | |
104 | # so both pools can't fit. We assume pgid 1.0 and 2.0 won't | |
105 | # map to the same 2 OSDs. | |
106 | # At least 1 pool shouldn't have room to backfill | |
107 | # All other pools should go active+clean | |
108 | function TEST_backfill_test_simple() { | |
109 | local dir=$1 | |
110 | local pools=2 | |
111 | local OSDS=3 | |
112 | ||
113 | run_mon $dir a || return 1 | |
114 | run_mgr $dir x || return 1 | |
115 | export CEPH_ARGS | |
116 | ||
117 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
118 | do | |
119 | run_osd $dir $osd || return 1 | |
120 | done | |
121 | ||
122 | ceph osd set-backfillfull-ratio .85 | |
123 | ||
124 | for p in $(seq 1 $pools) | |
125 | do | |
126 | create_pool "${poolprefix}$p" 1 1 | |
127 | ceph osd pool set "${poolprefix}$p" size 1 | |
128 | done | |
129 | ||
130 | wait_for_clean || return 1 | |
131 | ||
132 | # This won't work is if the 2 pools primary and only osds | |
133 | # are the same. | |
134 | ||
135 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 | |
136 | for o in $(seq 1 $objects) | |
137 | do | |
138 | for p in $(seq 1 $pools) | |
139 | do | |
140 | rados -p "${poolprefix}$p" put obj$o $dir/datafile | |
141 | done | |
142 | done | |
143 | ||
144 | ceph pg dump pgs | |
145 | ||
146 | for p in $(seq 1 $pools) | |
147 | do | |
148 | ceph osd pool set "${poolprefix}$p" size 2 | |
149 | done | |
150 | sleep 5 | |
151 | ||
152 | wait_for_backfill 240 || return 1 | |
153 | wait_for_active 60 || return 1 | |
154 | ||
155 | ERRORS=0 | |
156 | if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; | |
157 | then | |
158 | echo "One pool should have been in backfill_toofull" | |
159 | ERRORS="$(expr $ERRORS + 1)" | |
160 | fi | |
161 | ||
162 | expected="$(expr $pools - 1)" | |
163 | if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; | |
164 | then | |
165 | echo "$expected didn't finish backfill" | |
166 | ERRORS="$(expr $ERRORS + 1)" | |
167 | fi | |
168 | ||
169 | ceph pg dump pgs | |
170 | ||
171 | if [ $ERRORS != "0" ]; | |
172 | then | |
173 | return 1 | |
174 | fi | |
175 | ||
176 | for i in $(seq 1 $pools) | |
177 | do | |
178 | delete_pool "${poolprefix}$i" | |
179 | done | |
180 | kill_daemons $dir || return 1 | |
181 | ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 | |
182 | } | |
183 | ||
184 | ||
185 | # Create 8 pools of size 1 on 20 OSDs | |
186 | # Write 4K * 600 objects (only 1 pool pg can fit on any given osd) | |
187 | # Increase pool size to 2 | |
188 | # At least 1 pool shouldn't have room to backfill | |
189 | # All other pools should go active+clean | |
190 | function TEST_backfill_test_multi() { | |
191 | local dir=$1 | |
192 | local pools=8 | |
193 | local OSDS=20 | |
194 | ||
195 | run_mon $dir a || return 1 | |
196 | run_mgr $dir x || return 1 | |
197 | export CEPH_ARGS | |
198 | ||
199 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
200 | do | |
201 | run_osd $dir $osd || return 1 | |
202 | done | |
203 | ||
204 | ceph osd set-backfillfull-ratio .85 | |
205 | ||
206 | for p in $(seq 1 $pools) | |
207 | do | |
208 | create_pool "${poolprefix}$p" 1 1 | |
209 | ceph osd pool set "${poolprefix}$p" size 1 | |
210 | done | |
211 | ||
212 | wait_for_clean || return 1 | |
213 | ||
214 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 | |
215 | for o in $(seq 1 $objects) | |
216 | do | |
217 | for p in $(seq 1 $pools) | |
218 | do | |
219 | rados -p "${poolprefix}$p" put obj$o $dir/datafile | |
220 | done | |
221 | done | |
222 | ||
223 | ceph pg dump pgs | |
224 | ||
225 | for p in $(seq 1 $pools) | |
226 | do | |
227 | ceph osd pool set "${poolprefix}$p" size 2 | |
228 | done | |
229 | sleep 5 | |
230 | ||
231 | wait_for_backfill 240 || return 1 | |
232 | wait_for_active 60 || return 1 | |
233 | ||
234 | ERRORS=0 | |
235 | full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" | |
236 | if [ "$full" -lt "1" ]; | |
237 | then | |
238 | echo "At least one pool should have been in backfill_toofull" | |
239 | ERRORS="$(expr $ERRORS + 1)" | |
240 | fi | |
241 | ||
242 | expected="$(expr $pools - $full)" | |
243 | if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; | |
244 | then | |
245 | echo "$expected didn't finish backfill" | |
246 | ERRORS="$(expr $ERRORS + 1)" | |
247 | fi | |
248 | ||
249 | ceph pg dump pgs | |
250 | ||
251 | if [ $ERRORS != "0" ]; | |
252 | then | |
253 | return 1 | |
254 | fi | |
255 | ||
256 | for i in $(seq 1 $pools) | |
257 | do | |
258 | delete_pool "${poolprefix}$i" | |
259 | done | |
260 | # Work around for http://tracker.ceph.com/issues/38195 | |
261 | kill_daemons $dir #|| return 1 | |
262 | ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 | |
263 | } | |
264 | ||
265 | ||
266 | # To make sure that when 2 pg try to backfill at the same time to | |
267 | # the same target. This might be covered by the simple test above | |
268 | # but this makes sure we get it. | |
269 | # | |
270 | # Create 10 pools of size 2 and identify 2 that have the same | |
271 | # non-primary osd. | |
272 | # Delete all other pools | |
273 | # Set size to 1 and write 4K * 600 to each pool | |
274 | # Set size back to 2 | |
275 | # The 2 pools should race to backfill. | |
276 | # One pool goes active+clean | |
277 | # The other goes acitve+...+backfill_toofull | |
278 | function TEST_backfill_test_sametarget() { | |
279 | local dir=$1 | |
280 | local pools=10 | |
281 | local OSDS=5 | |
282 | ||
283 | run_mon $dir a || return 1 | |
284 | run_mgr $dir x || return 1 | |
285 | export CEPH_ARGS | |
286 | ||
287 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
288 | do | |
289 | run_osd $dir $osd || return 1 | |
290 | done | |
291 | ||
292 | ceph osd set-backfillfull-ratio .85 | |
293 | ||
294 | for p in $(seq 1 $pools) | |
295 | do | |
296 | create_pool "${poolprefix}$p" 1 1 | |
297 | ceph osd pool set "${poolprefix}$p" size 2 | |
298 | done | |
299 | sleep 5 | |
300 | ||
301 | wait_for_clean || return 1 | |
302 | ||
303 | ceph pg dump pgs | |
304 | ||
305 | # Find 2 pools with a pg that distinct primaries but second | |
306 | # replica on the same osd. | |
307 | local PG1 | |
308 | local POOLNUM1 | |
309 | local pool1 | |
310 | local chk_osd1 | |
311 | local chk_osd2 | |
312 | ||
313 | local PG2 | |
314 | local POOLNUM2 | |
315 | local pool2 | |
316 | for p in $(seq 1 $pools) | |
317 | do | |
318 | ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting | |
319 | local test_osd1=$(head -1 $dir/acting) | |
320 | local test_osd2=$(tail -1 $dir/acting) | |
321 | if [ $p = "1" ]; | |
322 | then | |
323 | PG1="${p}.0" | |
324 | POOLNUM1=$p | |
325 | pool1="${poolprefix}$p" | |
326 | chk_osd1=$test_osd1 | |
327 | chk_osd2=$test_osd2 | |
328 | elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ]; | |
329 | then | |
330 | PG2="${p}.0" | |
331 | POOLNUM2=$p | |
332 | pool2="${poolprefix}$p" | |
333 | break | |
334 | fi | |
335 | done | |
336 | rm -f $dir/acting | |
337 | ||
338 | if [ "$pool2" = "" ]; | |
339 | then | |
340 | echo "Failure to find appropirate PGs" | |
341 | return 1 | |
342 | fi | |
343 | ||
344 | for p in $(seq 1 $pools) | |
345 | do | |
346 | if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; | |
347 | then | |
348 | delete_pool ${poolprefix}$p | |
349 | fi | |
350 | done | |
351 | ||
352 | ceph osd pool set $pool1 size 1 | |
353 | ceph osd pool set $pool2 size 1 | |
354 | ||
355 | wait_for_clean || return 1 | |
356 | ||
357 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 | |
358 | for i in $(seq 1 $objects) | |
359 | do | |
360 | rados -p $pool1 put obj$i $dir/datafile | |
361 | rados -p $pool2 put obj$i $dir/datafile | |
362 | done | |
363 | ||
364 | ceph osd pool set $pool1 size 2 | |
365 | ceph osd pool set $pool2 size 2 | |
366 | sleep 5 | |
367 | ||
368 | wait_for_backfill 240 || return 1 | |
369 | wait_for_active 60 || return 1 | |
370 | ||
371 | ERRORS=0 | |
372 | if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; | |
373 | then | |
374 | echo "One pool should have been in backfill_toofull" | |
375 | ERRORS="$(expr $ERRORS + 1)" | |
376 | fi | |
377 | ||
378 | if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ]; | |
379 | then | |
380 | echo "One didn't finish backfill" | |
381 | ERRORS="$(expr $ERRORS + 1)" | |
382 | fi | |
383 | ||
384 | ceph pg dump pgs | |
385 | ||
386 | if [ $ERRORS != "0" ]; | |
387 | then | |
388 | return 1 | |
389 | fi | |
390 | ||
391 | delete_pool $pool1 | |
392 | delete_pool $pool2 | |
393 | kill_daemons $dir || return 1 | |
394 | ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 | |
395 | } | |
396 | ||
397 | # 2 pools can't both backfill to a target which has other data | |
398 | # 1 of the pools has objects that increase from 1024 to 2611 bytes | |
399 | # | |
400 | # Write to fill pool which is size 1 | |
401 | # Take fill pool osd down (other 2 pools must go to the remaining OSDs | |
402 | # Save an export of data on fill OSD and restart it | |
403 | # Write an intial 1K to pool1 which has pg 2.0 | |
404 | # Export 2.0 from non-fillpool OSD don't wait for it to start-up | |
405 | # Take down fillpool OSD | |
406 | # Put 1K object version of 2.0 on fillpool OSD | |
407 | # Put back fillpool data on fillpool OSD | |
408 | # With fillpool down write 2611 byte objects | |
409 | # Take down $osd and bring back $fillosd simultaneously | |
410 | # Wait for backfilling | |
411 | # PG 2.0 will be able to backfill its remaining data | |
412 | # PG 3.0 must get backfill_toofull | |
413 | function TEST_backfill_multi_partial() { | |
414 | local dir=$1 | |
415 | local EC=$2 | |
416 | local pools=2 | |
417 | local OSDS=3 | |
418 | ||
419 | run_mon $dir a || return 1 | |
420 | run_mgr $dir x || return 1 | |
421 | export CEPH_ARGS | |
422 | ||
423 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
424 | do | |
425 | run_osd $dir $osd || return 1 | |
426 | done | |
427 | ||
428 | ceph osd set-backfillfull-ratio .85 | |
429 | ||
430 | ceph osd set-require-min-compat-client luminous | |
431 | create_pool fillpool 1 1 | |
432 | ceph osd pool set fillpool size 1 | |
433 | for p in $(seq 1 $pools) | |
434 | do | |
435 | create_pool "${poolprefix}$p" 1 1 | |
436 | ceph osd pool set "${poolprefix}$p" size 2 | |
437 | done | |
438 | ||
439 | wait_for_clean || return 1 | |
440 | ||
441 | # Partially fill an osd | |
442 | # We have room for 600 6K replicated objects, if we create 2611 byte objects | |
443 | # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one | |
444 | # replica from the other 2 is 85% of 3600K | |
445 | ||
446 | dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 | |
447 | for o in $(seq 1 $objects) | |
448 | do | |
449 | rados -p fillpool put obj-fill-${o} $dir/datafile | |
450 | done | |
451 | ||
452 | local fillosd=$(get_primary fillpool obj-fill-1) | |
453 | osd=$(expr $fillosd + 1) | |
454 | if [ "$osd" = "$OSDS" ]; then | |
455 | osd="0" | |
456 | fi | |
457 | ||
458 | sleep 5 | |
459 | kill $(cat $dir/osd.$fillosd.pid) | |
460 | ceph osd out osd.$fillosd | |
461 | sleep 2 | |
462 | ||
463 | _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1 | |
464 | activate_osd $dir $fillosd || return 1 | |
465 | ||
466 | ceph pg dump pgs | |
467 | ||
468 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 | |
469 | for o in $(seq 1 $objects) | |
470 | do | |
471 | rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile | |
472 | done | |
473 | ||
474 | ceph pg dump pgs | |
475 | # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time | |
476 | _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out | |
477 | kill $(cat $dir/osd.$fillosd.pid) | |
478 | sleep 5 | |
479 | _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0 | |
480 | _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1 | |
481 | _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1 | |
482 | ceph pg dump pgs | |
483 | sleep 20 | |
484 | ceph pg dump pgs | |
485 | ||
486 | # re-write everything | |
487 | dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 | |
488 | for o in $(seq 1 $objects) | |
489 | do | |
490 | for p in $(seq 1 $pools) | |
491 | do | |
492 | rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile | |
493 | done | |
494 | done | |
495 | ||
496 | kill $(cat $dir/osd.$osd.pid) | |
497 | ceph osd out osd.$osd | |
498 | ||
499 | activate_osd $dir $fillosd || return 1 | |
500 | ceph osd in osd.$fillosd | |
501 | sleep 15 | |
502 | ||
503 | wait_for_backfill 240 || return 1 | |
504 | wait_for_active 60 || return 1 | |
505 | ||
506 | flush_pg_stats || return 1 | |
507 | ceph pg dump pgs | |
508 | ||
509 | ERRORS=0 | |
510 | if [ "$(ceph pg dump pgs | grep "^3.0" | grep +backfill_toofull | wc -l)" != "1" ]; | |
511 | then | |
512 | echo "PG 3.0 should be in backfill_toofull" | |
513 | ERRORS="$(expr $ERRORS + 1)" | |
514 | fi | |
515 | ||
516 | if [ "$(ceph pg dump pgs | grep "^2.0" | grep active+clean | wc -l)" != "1" ]; | |
517 | then | |
518 | echo "PG 2.0 should have completed backfill" | |
519 | ERRORS="$(expr $ERRORS + 1)" | |
520 | fi | |
521 | ||
522 | if [ $ERRORS != "0" ]; | |
523 | then | |
524 | return 1 | |
525 | fi | |
526 | ||
527 | delete_pool fillpool | |
528 | for i in $(seq 1 $pools) | |
529 | do | |
530 | delete_pool "${poolprefix}$i" | |
531 | done | |
532 | kill_daemons $dir || return 1 | |
533 | ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 | |
534 | } | |
535 | ||
536 | # Make sure that the amount of bytes already on the replica doesn't | |
537 | # cause an out of space condition | |
538 | # | |
539 | # Create 1 pool and write 4K * 600 objects | |
540 | # Remove 25% (150) of the objects with one OSD down (noout set) | |
541 | # Increase the size of the remaining 75% (450) of the objects to 6K | |
542 | # Bring back down OSD | |
543 | # The pool should go active+clean | |
544 | function TEST_backfill_grow() { | |
545 | local dir=$1 | |
546 | local poolname="test" | |
547 | local OSDS=3 | |
548 | ||
549 | run_mon $dir a || return 1 | |
550 | run_mgr $dir x || return 1 | |
551 | ||
552 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
553 | do | |
554 | run_osd $dir $osd || return 1 | |
555 | done | |
556 | ||
557 | ceph osd set-backfillfull-ratio .85 | |
558 | ||
559 | create_pool $poolname 1 1 | |
560 | ceph osd pool set $poolname size 3 | |
561 | sleep 5 | |
562 | ||
563 | wait_for_clean || return 1 | |
564 | ||
565 | dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4 | |
566 | for i in $(seq 1 $objects) | |
567 | do | |
568 | rados -p $poolname put obj$i $dir/4kdata | |
569 | done | |
570 | ||
571 | local PG=$(get_pg $poolname obj1) | |
572 | # Remember primary during the backfill | |
573 | local primary=$(get_primary $poolname obj1) | |
574 | local otherosd=$(get_not_primary $poolname obj1) | |
575 | ||
576 | ceph osd set noout | |
577 | kill_daemons $dir TERM $otherosd || return 1 | |
578 | ||
579 | rmobjects=$(expr $objects / 4) | |
580 | for i in $(seq 1 $rmobjects) | |
581 | do | |
582 | rados -p $poolname rm obj$i | |
583 | done | |
584 | ||
585 | dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1 | |
586 | for i in $(seq $(expr $rmobjects + 1) $objects) | |
587 | do | |
588 | rados -p $poolname put obj$i $dir/6kdata | |
589 | done | |
590 | ||
591 | activate_osd $dir $otherosd || return 1 | |
592 | ||
593 | ceph tell osd.$primary debug kick_recovery_wq 0 | |
594 | ||
595 | sleep 2 | |
596 | ||
597 | wait_for_clean || return 1 | |
598 | ||
599 | delete_pool $poolname | |
600 | kill_daemons $dir || return 1 | |
601 | ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 | |
602 | } | |
603 | ||
604 | # Create a 5 shard EC pool on 6 OSD cluster | |
605 | # Fill 1 OSD with 2600K of data take that osd down. | |
606 | # Write the EC pool on 5 OSDs | |
607 | # Take down 1 (must contain an EC shard) | |
608 | # Bring up OSD with fill data | |
609 | # Not enought room to backfill to partially full OSD | |
610 | function TEST_ec_backfill_simple() { | |
611 | local dir=$1 | |
612 | local EC=$2 | |
613 | local pools=1 | |
614 | local OSDS=6 | |
615 | local k=3 | |
616 | local m=2 | |
617 | local ecobjects=$(expr $objects / $k) | |
618 | ||
619 | run_mon $dir a || return 1 | |
620 | run_mgr $dir x || return 1 | |
621 | export CEPH_ARGS | |
622 | ||
623 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
624 | do | |
625 | run_osd $dir $osd || return 1 | |
626 | done | |
627 | ||
628 | ceph osd set-backfillfull-ratio .85 | |
629 | create_pool fillpool 1 1 | |
630 | ceph osd pool set fillpool size 1 | |
631 | ||
632 | # Partially fill an osd | |
633 | # We have room for 200 18K replicated objects, if we create 13K objects | |
634 | # there is only 3600K - (13K * 200) = 1000K which won't hold | |
635 | # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K | |
636 | # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which | |
637 | # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical | |
638 | # bytes in the pool. | |
639 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=13 | |
640 | for o in $(seq 1 $ecobjects) | |
641 | do | |
642 | rados -p fillpool put obj$o $dir/datafile | |
643 | done | |
644 | ||
645 | local fillosd=$(get_primary fillpool obj1) | |
646 | osd=$(expr $fillosd + 1) | |
647 | if [ "$osd" = "$OSDS" ]; then | |
648 | osd="0" | |
649 | fi | |
650 | ||
651 | sleep 5 | |
652 | kill $(cat $dir/osd.$fillosd.pid) | |
653 | ceph osd out osd.$fillosd | |
654 | sleep 2 | |
655 | ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 | |
656 | ||
657 | for p in $(seq 1 $pools) | |
658 | do | |
659 | ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile | |
660 | done | |
661 | ||
662 | # Can't wait for clean here because we created a stale pg | |
663 | #wait_for_clean || return 1 | |
664 | sleep 5 | |
665 | ||
666 | ceph pg dump pgs | |
667 | ||
668 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=18 | |
669 | for o in $(seq 1 $ecobjects) | |
670 | do | |
671 | for p in $(seq 1 $pools) | |
672 | do | |
673 | rados -p "${poolprefix}$p" put obj$o $dir/datafile | |
674 | done | |
675 | done | |
676 | ||
677 | kill $(cat $dir/osd.$osd.pid) | |
678 | ceph osd out osd.$osd | |
679 | ||
680 | activate_osd $dir $fillosd || return 1 | |
681 | ceph osd in osd.$fillosd | |
682 | sleep 30 | |
683 | ||
684 | ceph pg dump pgs | |
685 | ||
686 | wait_for_backfill 240 || return 1 | |
687 | wait_for_active 60 || return 1 | |
688 | ||
689 | ceph pg dump pgs | |
690 | ||
691 | ERRORS=0 | |
692 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then | |
693 | echo "One pool should have been in backfill_toofull" | |
694 | ERRORS="$(expr $ERRORS + 1)" | |
695 | fi | |
696 | ||
697 | if [ $ERRORS != "0" ]; | |
698 | then | |
699 | return 1 | |
700 | fi | |
701 | ||
702 | delete_pool fillpool | |
703 | for i in $(seq 1 $pools) | |
704 | do | |
705 | delete_pool "${poolprefix}$i" | |
706 | done | |
707 | kill_daemons $dir || return 1 | |
708 | } | |
709 | ||
710 | function osdlist() { | |
711 | local OSDS=$1 | |
712 | local excludeosd=$2 | |
713 | ||
714 | osds="" | |
715 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
716 | do | |
717 | if [ $osd = $excludeosd ]; | |
718 | then | |
719 | continue | |
720 | fi | |
721 | if [ -n "$osds" ]; then | |
722 | osds="${osds} " | |
723 | fi | |
724 | osds="${osds}${osd}" | |
725 | done | |
726 | echo $osds | |
727 | } | |
728 | ||
729 | # Create a pool with size 1 and fill with data so that only 1 EC shard can fit. | |
730 | # Write data to 2 EC pools mapped to the same OSDs (excluding filled one) | |
731 | # Remap the last OSD to partially full OSD on both pools | |
732 | # The 2 pools should race to backfill. | |
733 | # One pool goes active+clean | |
734 | # The other goes acitve+...+backfill_toofull | |
735 | function TEST_ec_backfill_multi() { | |
736 | local dir=$1 | |
737 | local EC=$2 | |
738 | local pools=2 | |
739 | local OSDS=6 | |
740 | local k=3 | |
741 | local m=2 | |
742 | local ecobjects=$(expr $objects / $k) | |
743 | ||
744 | run_mon $dir a || return 1 | |
745 | run_mgr $dir x || return 1 | |
746 | export CEPH_ARGS | |
747 | ||
748 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
749 | do | |
750 | run_osd $dir $osd || return 1 | |
751 | done | |
752 | ||
753 | # This test requires that shards from 2 different pools | |
754 | # fit on a given OSD, but both will not fix. I'm using | |
755 | # making the fillosd plus 1 shard use 75% of the space, | |
756 | # leaving not enough to be under the 85% set here. | |
757 | ceph osd set-backfillfull-ratio .85 | |
758 | ||
759 | ceph osd set-require-min-compat-client luminous | |
760 | create_pool fillpool 1 1 | |
761 | ceph osd pool set fillpool size 1 | |
762 | ||
763 | # Partially fill an osd | |
764 | # We have room for 200 18K replicated objects, if we create 9K objects | |
765 | # there is only 3600K - (9K * 200) = 1800K which will only hold | |
766 | # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K | |
767 | # The actual data will be (12K / 3) * 200 = 800K because the extra | |
768 | # is the reservation padding for chunking. | |
769 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 | |
770 | for o in $(seq 1 $ecobjects) | |
771 | do | |
772 | rados -p fillpool put obj$o $dir/datafile | |
773 | done | |
774 | ||
775 | local fillosd=$(get_primary fillpool obj1) | |
776 | ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 | |
777 | ||
778 | nonfillosds="$(osdlist $OSDS $fillosd)" | |
779 | ||
780 | for p in $(seq 1 $pools) | |
781 | do | |
782 | ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile | |
783 | ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds | |
784 | done | |
785 | ||
786 | # Can't wait for clean here because we created a stale pg | |
787 | #wait_for_clean || return 1 | |
788 | sleep 15 | |
789 | ||
790 | ceph pg dump pgs | |
791 | ||
792 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 | |
793 | for o in $(seq 1 $ecobjects) | |
794 | do | |
795 | for p in $(seq 1 $pools) | |
796 | do | |
797 | rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile | |
798 | done | |
799 | done | |
800 | ||
801 | ceph pg dump pgs | |
802 | ||
803 | for p in $(seq 1 $pools) | |
804 | do | |
805 | ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd | |
806 | done | |
807 | ||
808 | sleep 10 | |
809 | ||
810 | wait_for_backfill 240 || return 1 | |
811 | wait_for_active 60 || return 1 | |
812 | ||
813 | ceph pg dump pgs | |
814 | ||
815 | ERRORS=0 | |
816 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; | |
817 | then | |
818 | echo "One pool should have been in backfill_toofull" | |
819 | ERRORS="$(expr $ERRORS + 1)" | |
820 | fi | |
821 | ||
822 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; | |
823 | then | |
824 | echo "One didn't finish backfill" | |
825 | ERRORS="$(expr $ERRORS + 1)" | |
826 | fi | |
827 | ||
828 | if [ $ERRORS != "0" ]; | |
829 | then | |
830 | return 1 | |
831 | fi | |
832 | ||
833 | delete_pool fillpool | |
834 | for i in $(seq 1 $pools) | |
835 | do | |
836 | delete_pool "${poolprefix}$i" | |
837 | done | |
838 | kill_daemons $dir || return 1 | |
839 | } | |
840 | ||
841 | # Similar to TEST_ec_backfill_multi but one of the ec pools | |
842 | # already had some data on the target OSD | |
843 | ||
844 | # Create a pool with size 1 and fill with data so that only 1 EC shard can fit. | |
845 | # Write a small amount of data to 1 EC pool that still includes the filled one | |
846 | # Take down fillosd with noout set | |
847 | # Write data to 2 EC pools mapped to the same OSDs (excluding filled one) | |
848 | # Remap the last OSD to partially full OSD on both pools | |
849 | # The 2 pools should race to backfill. | |
850 | # One pool goes active+clean | |
851 | # The other goes acitve+...+backfill_toofull | |
852 | function SKIP_TEST_ec_backfill_multi_partial() { | |
853 | local dir=$1 | |
854 | local EC=$2 | |
855 | local pools=2 | |
856 | local OSDS=5 | |
857 | local k=3 | |
858 | local m=2 | |
859 | local ecobjects=$(expr $objects / $k) | |
860 | local lastosd=$(expr $OSDS - 1) | |
861 | ||
862 | run_mon $dir a || return 1 | |
863 | run_mgr $dir x || return 1 | |
864 | export CEPH_ARGS | |
865 | ||
866 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
867 | do | |
868 | run_osd $dir $osd || return 1 | |
869 | done | |
870 | ||
871 | # This test requires that shards from 2 different pools | |
872 | # fit on a given OSD, but both will not fix. I'm using | |
873 | # making the fillosd plus 1 shard use 75% of the space, | |
874 | # leaving not enough to be under the 85% set here. | |
875 | ceph osd set-backfillfull-ratio .85 | |
876 | ||
877 | ceph osd set-require-min-compat-client luminous | |
878 | create_pool fillpool 1 1 | |
879 | ceph osd pool set fillpool size 1 | |
880 | # last osd | |
881 | ceph osd pg-upmap 1.0 $lastosd | |
882 | ||
883 | # Partially fill an osd | |
884 | # We have room for 200 18K replicated objects, if we create 9K objects | |
885 | # there is only 3600K - (9K * 200) = 1800K which will only hold | |
886 | # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K | |
887 | # The actual data will be (12K / 3) * 200 = 800K because the extra | |
888 | # is the reservation padding for chunking. | |
889 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 | |
890 | for o in $(seq 1 $ecobjects) | |
891 | do | |
892 | rados -p fillpool put obj$o $dir/datafile | |
893 | done | |
894 | ||
895 | local fillosd=$(get_primary fillpool obj1) | |
896 | ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 | |
897 | ||
898 | nonfillosds="$(osdlist $OSDS $fillosd)" | |
899 | ||
900 | for p in $(seq 1 $pools) | |
901 | do | |
902 | ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile | |
903 | ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) | |
904 | done | |
905 | ||
906 | # Can't wait for clean here because we created a stale pg | |
907 | #wait_for_clean || return 1 | |
908 | sleep 15 | |
909 | ||
910 | ceph pg dump pgs | |
911 | ||
912 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 | |
913 | for o in $(seq 1 $ecobjects) | |
914 | do | |
915 | rados -p "${poolprefix}1" put obj$o-1 $dir/datafile | |
916 | done | |
917 | ||
918 | for p in $(seq 1 $pools) | |
919 | do | |
920 | ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1)) | |
921 | done | |
922 | ceph pg dump pgs | |
923 | ||
924 | #ceph osd set noout | |
925 | #kill_daemons $dir TERM osd.$lastosd || return 1 | |
926 | ||
927 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 | |
928 | for o in $(seq 1 $ecobjects) | |
929 | do | |
930 | for p in $(seq 1 $pools) | |
931 | do | |
932 | rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile | |
933 | done | |
934 | done | |
935 | ||
936 | ceph pg dump pgs | |
937 | ||
938 | # Now backfill lastosd by adding back into the upmap | |
939 | for p in $(seq 1 $pools) | |
940 | do | |
941 | ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) | |
942 | done | |
943 | #activate_osd $dir $lastosd || return 1 | |
944 | #ceph tell osd.0 debug kick_recovery_wq 0 | |
945 | ||
946 | sleep 10 | |
947 | ceph pg dump pgs | |
948 | ||
949 | wait_for_backfill 240 || return 1 | |
950 | wait_for_active 60 || return 1 | |
951 | ||
952 | ceph pg dump pgs | |
953 | ||
954 | ERRORS=0 | |
955 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; | |
956 | then | |
957 | echo "One pool should have been in backfill_toofull" | |
958 | ERRORS="$(expr $ERRORS + 1)" | |
959 | fi | |
960 | ||
961 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; | |
962 | then | |
963 | echo "One didn't finish backfill" | |
964 | ERRORS="$(expr $ERRORS + 1)" | |
965 | fi | |
966 | ||
967 | if [ $ERRORS != "0" ]; | |
968 | then | |
969 | return 1 | |
970 | fi | |
971 | ||
972 | delete_pool fillpool | |
973 | for i in $(seq 1 $pools) | |
974 | do | |
975 | delete_pool "${poolprefix}$i" | |
976 | done | |
977 | kill_daemons $dir || return 1 | |
978 | } | |
979 | ||
980 | function SKIP_TEST_ec_backfill_multi_partial() { | |
981 | local dir=$1 | |
982 | local EC=$2 | |
983 | local pools=2 | |
984 | local OSDS=6 | |
985 | ||
986 | run_mon $dir a || return 1 | |
987 | run_mgr $dir x || return 1 | |
988 | export CEPH_ARGS | |
989 | ||
990 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
991 | do | |
992 | run_osd $dir $osd || return 1 | |
993 | done | |
994 | ||
995 | # Below we need to fit 3200K in 3600K which is 88% | |
996 | # so set to 90% | |
997 | ceph osd set-backfillfull-ratio .90 | |
998 | ||
999 | ceph osd set-require-min-compat-client luminous | |
1000 | create_pool fillpool 1 1 | |
1001 | ceph osd pool set fillpool size 1 | |
1002 | ||
1003 | # Partially fill an osd | |
1004 | # We have room for 200 48K ec objects, if we create 4k replicated objects | |
1005 | # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard | |
1006 | # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each. | |
1007 | # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K. | |
1008 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 | |
1009 | for o in $(seq 1 $objects) | |
1010 | do | |
1011 | rados -p fillpool put obj$o $dir/datafile | |
1012 | done | |
1013 | ||
1014 | local fillosd=$(get_primary fillpool obj1) | |
1015 | osd=$(expr $fillosd + 1) | |
1016 | if [ "$osd" = "$OSDS" ]; then | |
1017 | osd="0" | |
1018 | fi | |
1019 | ||
1020 | sleep 5 | |
1021 | kill $(cat $dir/osd.$fillosd.pid) | |
1022 | ceph osd out osd.$fillosd | |
1023 | sleep 2 | |
1024 | ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 | |
1025 | ||
1026 | for p in $(seq 1 $pools) | |
1027 | do | |
1028 | ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile | |
1029 | done | |
1030 | ||
1031 | # Can't wait for clean here because we created a stale pg | |
1032 | #wait_for_clean || return 1 | |
1033 | sleep 5 | |
1034 | ||
1035 | ceph pg dump pgs | |
1036 | ||
1037 | dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 | |
1038 | for o in $(seq 1 $objects) | |
1039 | do | |
1040 | for p in $(seq 1 $pools) | |
1041 | do | |
1042 | rados -p "${poolprefix}$p" put obj$o $dir/datafile | |
1043 | done | |
1044 | done | |
1045 | ||
1046 | #ceph pg map 2.0 --format=json | jq '.' | |
1047 | kill $(cat $dir/osd.$osd.pid) | |
1048 | ceph osd out osd.$osd | |
1049 | ||
1050 | _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out | |
1051 | _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out | |
1052 | ||
1053 | activate_osd $dir $fillosd || return 1 | |
1054 | ceph osd in osd.$fillosd | |
1055 | sleep 15 | |
1056 | ||
1057 | wait_for_backfill 240 || return 1 | |
1058 | wait_for_active 60 || return 1 | |
1059 | ||
1060 | ERRORS=0 | |
1061 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; | |
1062 | then | |
1063 | echo "One pool should have been in backfill_toofull" | |
1064 | ERRORS="$(expr $ERRORS + 1)" | |
1065 | fi | |
1066 | ||
1067 | if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; | |
1068 | then | |
1069 | echo "One didn't finish backfill" | |
1070 | ERRORS="$(expr $ERRORS + 1)" | |
1071 | fi | |
1072 | ||
1073 | ceph pg dump pgs | |
1074 | ||
1075 | if [ $ERRORS != "0" ]; | |
1076 | then | |
1077 | return 1 | |
1078 | fi | |
1079 | ||
1080 | delete_pool fillpool | |
1081 | for i in $(seq 1 $pools) | |
1082 | do | |
1083 | delete_pool "${poolprefix}$i" | |
1084 | done | |
1085 | kill_daemons $dir || return 1 | |
1086 | } | |
1087 | ||
1088 | # Create 1 EC pool | |
1089 | # Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K | |
1090 | # Take 1 shard's OSD down (with noout set) | |
1091 | # Remove 50 objects ((12K / 3) + 4k) * 50) = 400K | |
1092 | # Write 150 36K objects (grow 150 objects) 2400K | |
1093 | # But there is already 1600K usage so backfill | |
1094 | # would be too full if it didn't account for existing data | |
1095 | # Bring back down OSD so it must backfill | |
1096 | # It should go active+clean taking into account data already there | |
1097 | function TEST_ec_backfill_grow() { | |
1098 | local dir=$1 | |
1099 | local poolname="test" | |
1100 | local OSDS=6 | |
1101 | local k=3 | |
1102 | local m=2 | |
1103 | local ecobjects=$(expr $objects / $k) | |
1104 | ||
1105 | run_mon $dir a || return 1 | |
1106 | run_mgr $dir x || return 1 | |
1107 | ||
1108 | for osd in $(seq 0 $(expr $OSDS - 1)) | |
1109 | do | |
1110 | run_osd $dir $osd || return 1 | |
1111 | done | |
1112 | ||
1113 | ceph osd set-backfillfull-ratio .85 | |
1114 | ||
1115 | ceph osd set-require-min-compat-client luminous | |
1116 | ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 | |
1117 | ceph osd pool create $poolname 1 1 erasure ec-profile | |
1118 | ||
1119 | wait_for_clean || return 1 | |
1120 | ||
1121 | dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12 | |
1122 | for i in $(seq 1 $ecobjects) | |
1123 | do | |
1124 | rados -p $poolname put obj$i $dir/12kdata | |
1125 | done | |
1126 | ||
1127 | local PG=$(get_pg $poolname obj1) | |
1128 | # Remember primary during the backfill | |
1129 | local primary=$(get_primary $poolname obj1) | |
1130 | local otherosd=$(get_not_primary $poolname obj1) | |
1131 | ||
1132 | ceph osd set noout | |
1133 | kill_daemons $dir TERM $otherosd || return 1 | |
1134 | ||
1135 | rmobjects=$(expr $ecobjects / 4) | |
1136 | for i in $(seq 1 $rmobjects) | |
1137 | do | |
1138 | rados -p $poolname rm obj$i | |
1139 | done | |
1140 | ||
1141 | dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36 | |
1142 | for i in $(seq $(expr $rmobjects + 1) $ecobjects) | |
1143 | do | |
1144 | rados -p $poolname put obj$i $dir/36kdata | |
1145 | done | |
1146 | ||
1147 | activate_osd $dir $otherosd || return 1 | |
1148 | ||
1149 | ceph tell osd.$primary debug kick_recovery_wq 0 | |
1150 | ||
1151 | sleep 2 | |
1152 | ||
1153 | wait_for_clean || return 1 | |
1154 | ||
1155 | delete_pool $poolname | |
1156 | kill_daemons $dir || return 1 | |
1157 | } | |
1158 | ||
1159 | main osd-backfill-space "$@" | |
1160 | ||
1161 | # Local Variables: | |
1162 | # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh" | |
1163 | # End: |