X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fqa%2Ftasks%2Fceph_manager.py;h=2516043217b7d63cdb0085c239e10257f652c571;hb=f6b5b4d738b87d88d2de35127b6b0e41eae2a272;hp=c058735aa8a27d9aee0b0a5869e62ee0f37f1e30;hpb=12732ca2e80d168d344a265acffc1fbd1fa1f1b5;p=ceph.git diff --git a/ceph/qa/tasks/ceph_manager.py b/ceph/qa/tasks/ceph_manager.py index c058735aa..251604321 100644 --- a/ceph/qa/tasks/ceph_manager.py +++ b/ceph/qa/tasks/ceph_manager.py @@ -2467,6 +2467,36 @@ class CephManager: pgs = self.get_pg_stats() return self._get_num_active_down(pgs) == len(pgs) + def dump_pgs_not_active_clean(self): + """ + Dumps all pgs that are not active+clean + """ + pgs = self.get_pg_stats() + for pg in pgs: + if pg['state'] != 'active+clean': + self.log('PG %s is not active+clean' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active_down(self): + """ + Dumps all pgs that are not active or down + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state'] and 'down' not in pg['state']: + self.log('PG %s is not active or down' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active(self): + """ + Dumps all pgs that are not active + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state']: + self.log('PG %s is not active' % pg['pgid']) + self.log(pg) + def wait_for_clean(self, timeout=1200): """ Returns true when all pgs are clean. @@ -2482,11 +2512,10 @@ class CephManager: else: self.log("no progress seen, keeping timeout for now") if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) + self.log('dumping pgs not clean') + self.dump_pgs_not_active_clean() assert time.time() - start < timeout, \ - 'failed to become clean before timeout expired' + 'wait_for_clean: failed before timeout expired' cur_active_clean = self.get_num_active_clean() if cur_active_clean != num_active_clean: start = time.time() @@ -2568,11 +2597,10 @@ class CephManager: if now - start >= timeout: if self.is_recovered(): break - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) + self.log('dumping pgs not recovered yet') + self.dump_pgs_not_active_clean() assert now - start < timeout, \ - 'failed to recover before timeout expired' + 'wait_for_recovery: failed before timeout expired' cur_active_recovered = self.get_num_active_recovered() if cur_active_recovered != num_active_recovered: start = time.time() @@ -2590,11 +2618,10 @@ class CephManager: while not self.is_active(): if timeout is not None: if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) + self.log('dumping pgs not active') + self.dump_pgs_not_active() assert time.time() - start < timeout, \ - 'failed to recover before timeout expired' + 'wait_for_active: failed before timeout expired' cur_active = self.get_num_active() if cur_active != num_active: start = time.time() @@ -2613,11 +2640,10 @@ class CephManager: while not self.is_active_or_down(): if timeout is not None: if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) + self.log('dumping pgs not active or down') + self.dump_pgs_not_active_down() assert time.time() - start < timeout, \ - 'failed to recover before timeout expired' + 'wait_for_active_or_down: failed before timeout expired' cur_active_down = self.get_num_active_down() if cur_active_down != num_active_down: start = time.time() @@ -2667,11 +2693,10 @@ class CephManager: while not self.is_active(): if timeout is not None: if time.time() - start >= timeout: - self.log('dumping pgs') - out = self.raw_cluster_cmd('pg', 'dump') - self.log(out) + self.log('dumping pgs not active') + self.dump_pgs_not_active() assert time.time() - start < timeout, \ - 'failed to become active before timeout expired' + 'wait_till_active: failed before timeout expired' time.sleep(3) self.log("active!")