import new upstream nautilus stable release 14.2.8

[ceph.git] / ceph / qa / standalone / ceph-helpers.sh
diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh

index 52b0eee691714fd6a9d7bbf39a5d68613084cf83..386f95fb57116b71f3fad2f5d7fd9fb5a72805f5 100755 (executable)
--- a/ceph/qa/standalone/ceph-helpers.sh
+++ b/ceph/qa/standalone/ceph-helpers.sh
@@ -293,8 +293,6 @@ function test_kill_daemon() {
          kill_daemon $pidfile TERM || return 1
      done
  
-    ceph osd dump | grep "osd.0 down" || return 1
-
      name_prefix=mgr
      for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
          #
@@ -381,7 +379,6 @@ function test_kill_daemons() {
      # killing just the osd and verify the mon still is responsive
      #
      kill_daemons $dir TERM osd || return 1
-    ceph osd dump | grep "osd.0 down" || return 1
      #
      # kill the mgr
      #
@@ -780,6 +777,7 @@ function destroy_osd() {
  
      ceph osd out osd.$id || return 1
      kill_daemons $dir TERM osd.$id || return 1
+    ceph osd down osd.$id || return 1
      ceph osd purge osd.$id --yes-i-really-mean-it || return 1
      teardown $dir/$id || return 1
      rm -fr $dir/$id
@@ -930,8 +928,10 @@ function test_wait_for_osd() {
      run_mon $dir a --osd_pool_default_size=1 || return 1
      run_mgr $dir x || return 1
      run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
      wait_for_osd up 0 || return 1
-    kill_daemons $dir TERM osd || return 1
+    wait_for_osd up 1 || return 1
+    kill_daemons $dir TERM osd.0 || return 1
      wait_for_osd down 0 || return 1
      ( TIMEOUT=1 ; ! wait_for_osd up 0 ) || return 1
      teardown $dir || return 1
@@ -1313,6 +1313,36 @@ function test_get_num_active_clean() {
      teardown $dir || return 1
  }
  
+##
+# Return the number of active or peered PGs in the cluster. A PG matches if
+# ceph pg dump pgs reports it is either **active** or **peered** and that
+# not **stale**.
+#
+# @param STDOUT the number of active PGs
+# @return 0 on success, 1 on error
+#
+function get_num_active_or_peered() {
+    local expression
+    expression+="select(contains(\"active\") or contains(\"peered\")) | "
+    expression+="select(contains(\"stale\") | not)"
+    ceph --format json pg dump pgs 2>/dev/null | \
+        jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+function test_get_num_active_or_peered() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+    local num_peered=$(get_num_active_or_peered)
+    test "$num_peered" = $PG_NUM || return 1
+    teardown $dir || return 1
+}
+
  #######################################################################
  
  ##
@@ -1588,6 +1618,64 @@ function test_wait_for_clean() {
      teardown $dir || return 1
  }
  
+##
+# Wait until the cluster becomes peered or if it does not make progress
+# for $WAIT_FOR_CLEAN_TIMEOUT seconds.
+# Progress is measured either via the **get_is_making_recovery_progress**
+# predicate or if the number of peered PGs changes (as returned by get_num_active_or_peered)
+#
+# @return 0 if the cluster is clean, 1 otherwise
+#
+function wait_for_peered() {
+    local cmd=$1
+    local num_peered=-1
+    local cur_peered
+    local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1))
+    local -i loop=0
+
+    flush_pg_stats || return 1
+    while test $(get_num_pgs) == 0 ; do
+       sleep 1
+    done
+
+    while true ; do
+        # Comparing get_num_active_clean & get_num_pgs is used to determine
+        # if the cluster is clean. That's almost an inline of is_clean() to
+        # get more performance by avoiding multiple calls of get_num_active_clean.
+        cur_peered=$(get_num_active_or_peered)
+        test $cur_peered = $(get_num_pgs) && break
+        if test $cur_peered != $num_peered ; then
+            loop=0
+            num_peered=$cur_peered
+        elif get_is_making_recovery_progress ; then
+            loop=0
+        elif (( $loop >= ${#delays[*]} )) ; then
+            ceph report
+            return 1
+        fi
+       # eval is a no-op if cmd is empty
+        eval $cmd
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+    return 0
+}
+
+function test_wait_for_peered() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_osd $dir 0 || return 1
+    run_mgr $dir x || return 1
+    create_rbd_pool || return 1
+    ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1
+    run_osd $dir 1 || return 1
+    wait_for_peered || return 1
+    teardown $dir || return 1
+}
+
+
  #######################################################################
  
  ##