]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/mds/RecoveryQueue.cc
update sources to v12.2.3
[ceph.git] / ceph / src / mds / RecoveryQueue.cc
index ed7d1096bc374b43066c585a05ace4749c16c083..d205c6f278b9196901e5b8c1b8ea5a261b46927b 100644 (file)
@@ -47,9 +47,11 @@ public:
 };
 
 
-RecoveryQueue::RecoveryQueue(MDSRank *mds_)
-  : mds(mds_), logger(NULL), filer(mds_->objecter, mds_->finisher)
-{}
+RecoveryQueue::RecoveryQueue(MDSRank *mds_) :
+  file_recover_queue(member_offset(CInode, item_dirty_dirfrag_dir)),
+  file_recover_queue_front(member_offset(CInode, item_dirty_dirfrag_nest)),
+  mds(mds_), logger(NULL), filer(mds_->objecter, mds_->finisher)
+{ }
 
 
 /**
@@ -58,19 +60,20 @@ RecoveryQueue::RecoveryQueue(MDSRank *mds_)
  */
 void RecoveryQueue::advance()
 {
-  dout(10) << file_recover_queue.size() << " queued, "
-          << file_recover_queue_front.size() << " prioritized, "
+  dout(10) << file_recover_queue_size << " queued, "
+          << file_recover_queue_front_size << " prioritized, "
           << file_recovering.size() << " recovering" << dendl;
 
   while (file_recovering.size() < g_conf->mds_max_file_recover) {
     if (!file_recover_queue_front.empty()) {
-      CInode *in = *file_recover_queue_front.begin();
-      file_recover_queue_front.erase(file_recover_queue_front.begin());
-      file_recover_queue.erase(in);
+      CInode *in = file_recover_queue_front.front();
+      in->item_recover_queue_front.remove_myself();
+      file_recover_queue_front_size--;
       _start(in);
     } else if (!file_recover_queue.empty()) {
-      CInode *in = *file_recover_queue.begin();
-      file_recover_queue.erase(file_recover_queue.begin());
+      CInode *in = file_recover_queue.front();
+      in->item_recover_queue.remove_myself();
+      file_recover_queue_size--;
       _start(in);
     } else {
       break;
@@ -78,8 +81,8 @@ void RecoveryQueue::advance()
   }
 
   logger->set(l_mdc_num_recovering_processing, file_recovering.size());
-  logger->set(l_mdc_num_recovering_enqueued, file_recover_queue.size());
-  logger->set(l_mdc_num_recovering_prioritized, file_recover_queue_front.size());
+  logger->set(l_mdc_num_recovering_enqueued, file_recover_queue_size + file_recover_queue_front_size);
+  logger->set(l_mdc_num_recovering_prioritized, file_recover_queue_front_size);
 }
 
 void RecoveryQueue::_start(CInode *in)
@@ -92,20 +95,28 @@ void RecoveryQueue::_start(CInode *in)
                      << " on ino " << pi->ino;
   }
 
+  auto p = file_recovering.find(in);
   if (pi->client_ranges.size() && pi->get_max_size()) {
     dout(10) << "starting " << in->inode.size << " " << pi->client_ranges
             << " " << *in << dendl;
-    file_recovering.insert(in);
+    if (p == file_recovering.end()) {
+      file_recovering.insert(make_pair(in, false));
 
-    C_MDC_Recover *fin = new C_MDC_Recover(this, in);
-    filer.probe(in->inode.ino, &in->inode.layout, in->last,
-               pi->get_max_size(), &fin->size, &fin->mtime, false,
-               0, fin);
+      C_MDC_Recover *fin = new C_MDC_Recover(this, in);
+      filer.probe(in->inode.ino, &in->inode.layout, in->last,
+                 pi->get_max_size(), &fin->size, &fin->mtime, false,
+                 0, fin);
+    } else {
+      p->second = true;
+      dout(10) << "already working on " << *in << ", set need_restart flag" << dendl;
+    }
   } else {
     dout(10) << "skipping " << in->inode.size << " " << *in << dendl;
-    in->state_clear(CInode::STATE_RECOVERING);
-    mds->locker->eval(in, CEPH_LOCK_IFILE);
-    in->auth_unpin(this);
+    if (p == file_recovering.end()) {
+      in->state_clear(CInode::STATE_RECOVERING);
+      mds->locker->eval(in, CEPH_LOCK_IFILE);
+      in->auth_unpin(this);
+    }
   }
 }
 
@@ -116,16 +127,28 @@ void RecoveryQueue::prioritize(CInode *in)
     return;
   }
 
-  if (file_recover_queue.count(in)) {
+  if (!in->item_recover_queue_front.is_on_list()) {
     dout(20) << *in << dendl;
-    file_recover_queue_front.insert(in);
-    logger->set(l_mdc_num_recovering_prioritized, file_recover_queue_front.size());
+
+    assert(in->item_recover_queue.is_on_list());
+    in->item_recover_queue.remove_myself();
+    file_recover_queue_size--;
+
+    file_recover_queue_front.push_back(&in->item_recover_queue_front);
+
+    file_recover_queue_front_size++;
+    logger->set(l_mdc_num_recovering_prioritized, file_recover_queue_front_size);
     return;
   }
 
   dout(10) << "not queued " << *in << dendl;
 }
 
+static bool _is_in_any_recover_queue(CInode *in)
+{
+  return in->item_recover_queue.is_on_list() ||
+        in->item_recover_queue_front.is_on_list();
+}
 
 /**
  * Given an authoritative inode which is in the cache,
@@ -143,8 +166,12 @@ void RecoveryQueue::enqueue(CInode *in)
     in->auth_pin(this);
     logger->inc(l_mdc_recovery_started);
   }
-  file_recover_queue.insert(in);
-  logger->set(l_mdc_num_recovering_enqueued, file_recover_queue.size());
+
+  if (!_is_in_any_recover_queue(in)) {
+    file_recover_queue.push_back(&in->item_recover_queue);
+    file_recover_queue_size++;
+    logger->set(l_mdc_num_recovering_enqueued, file_recover_queue_size + file_recover_queue_front_size);
+  }
 }
 
 
@@ -173,16 +200,28 @@ void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime)
     }
   }
 
-  file_recovering.erase(in);
+  auto p = file_recovering.find(in);
+  assert(p != file_recovering.end());
+  bool restart = p->second;
+  file_recovering.erase(p);
+
   logger->set(l_mdc_num_recovering_processing, file_recovering.size());
   logger->inc(l_mdc_recovery_completed);
   in->state_clear(CInode::STATE_RECOVERING);
 
-  if (!in->get_parent_dn() && !in->get_projected_parent_dn()) {
-    dout(10) << " inode has no parents, killing it off" << dendl;
-    in->auth_unpin(this);
-    mds->mdcache->remove_inode(in);
-  } else {
+  if (restart) {
+    if (in->item_recover_queue.is_on_list()) {
+      in->item_recover_queue.remove_myself();
+      file_recover_queue_size--;
+    }
+    if (in->item_recover_queue_front.is_on_list()) {
+      in->item_recover_queue_front.remove_myself();
+      file_recover_queue_front_size--;
+    }
+    logger->set(l_mdc_num_recovering_enqueued, file_recover_queue_size + file_recover_queue_front_size);
+    logger->set(l_mdc_num_recovering_prioritized, file_recover_queue_front_size);
+    _start(in);
+  } else if (!_is_in_any_recover_queue(in)) {
     // journal
     mds->locker->check_inode_max_size(in, true, 0,  size, mtime);
     mds->locker->eval(in, CEPH_LOCK_IFILE);