]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/osdc/Objecter.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / osdc / Objecter.cc
index da25c584f797421c69f4922746d3cf296719cf08..0050489d663de965318bbb1779795d424951f425 100644 (file)
@@ -75,6 +75,9 @@ using ceph::encode;
 using ceph::Formatter;
 
 using std::defer_lock;
+using std::scoped_lock;
+using std::shared_lock;
+using std::unique_lock;
 
 using ceph::real_time;
 using ceph::real_clock;
@@ -868,10 +871,15 @@ void Objecter::_linger_submit(LingerOp *info,
 
   // Populate Op::target
   OSDSession *s = NULL;
-  _calc_target(&info->target, nullptr);
+  int r = _calc_target(&info->target, nullptr);
+  switch (r) {
+  case RECALC_OP_TARGET_POOL_EIO:
+    _check_linger_pool_eio(info);
+    return;
+  }
 
   // Create LingerOp<->OSDSession relation
-  int r = _get_session(info->target.osd, &s, sul);
+  r = _get_session(info->target.osd, &s, sul);
   ceph_assert(r == 0);
   unique_lock sl(s->lock);
   _session_linger_op_assign(s, info);
@@ -1063,6 +1071,13 @@ void Objecter::_scan_requests(
        unregister_lingers.push_back(op);
       }
       break;
+    case RECALC_OP_TARGET_POOL_EIO:
+      _check_linger_pool_eio(op);
+      ldout(cct, 10) << " need to unregister linger op "
+                    << op->linger_id << dendl;
+      op->get();
+      unregister_lingers.push_back(op);
+      break;
     }
   }
 
@@ -1092,6 +1107,9 @@ void Objecter::_scan_requests(
     case RECALC_OP_TARGET_POOL_DNE:
       _check_op_pool_dne(op, &sl);
       break;
+    case RECALC_OP_TARGET_POOL_EIO:
+      _check_op_pool_eio(op, &sl);
+      break;
     }
   }
 
@@ -1562,6 +1580,37 @@ void Objecter::_check_op_pool_dne(Op *op, std::unique_lock<std::shared_mutex> *s
   }
 }
 
+// sl may be unlocked.
+void Objecter::_check_op_pool_eio(Op *op, std::unique_lock<std::shared_mutex> *sl)
+{
+  // rwlock is locked unique
+
+  // we had a new enough map
+  ldout(cct, 10) << "check_op_pool_eio tid " << op->tid
+                << " concluding pool " << op->target.base_pgid.pool()
+                << " has eio" << dendl;
+  if (op->has_completion()) {
+    num_in_flight--;
+    op->complete(osdc_errc::pool_eio, -EIO);
+  }
+
+  OSDSession *s = op->session;
+  if (s) {
+    ceph_assert(s != NULL);
+    ceph_assert(sl->mutex() == &s->lock);
+    bool session_locked = sl->owns_lock();
+    if (!session_locked) {
+      sl->lock();
+    }
+    _finish_op(op, 0);
+    if (!session_locked) {
+      sl->unlock();
+    }
+  } else {
+    _finish_op(op, 0); // no session
+  }
+}
+
 void Objecter::_send_op_map_check(Op *op)
 {
   // rwlock is locked unique
@@ -1656,6 +1705,23 @@ void Objecter::_check_linger_pool_dne(LingerOp *op, bool *need_unregister)
   }
 }
 
+void Objecter::_check_linger_pool_eio(LingerOp *op)
+{
+  // rwlock is locked unique
+
+  std::unique_lock wl{op->watch_lock};
+  if (op->on_reg_commit) {
+    op->on_reg_commit->defer(std::move(op->on_reg_commit),
+                            osdc_errc::pool_dne, cb::list{});
+    op->on_reg_commit = nullptr;
+  }
+  if (op->on_notify_finish) {
+    op->on_notify_finish->defer(std::move(op->on_notify_finish),
+                               osdc_errc::pool_dne, cb::list{});
+    op->on_notify_finish = nullptr;
+  }
+}
+
 void Objecter::_send_linger_map_check(LingerOp *op)
 {
   // ask the monitor
@@ -2313,11 +2379,21 @@ void Objecter::_op_submit(Op *op, shunique_lock<ceph::shared_mutex>& sul, ceph_t
   ceph_assert(op->session == NULL);
   OSDSession *s = NULL;
 
-  bool check_for_latest_map = _calc_target(&op->target, nullptr)
-    == RECALC_OP_TARGET_POOL_DNE;
+  bool check_for_latest_map = false;
+  int r = _calc_target(&op->target, nullptr);
+  switch(r) {
+  case RECALC_OP_TARGET_POOL_DNE:
+    check_for_latest_map = true;
+    break;
+  case RECALC_OP_TARGET_POOL_EIO:
+    if (op->has_completion()) {
+      op->complete(osdc_errc::pool_eio, -EIO);
+    }
+    return;
+  }
 
   // Try to get a session, including a retry if we need to take write lock
-  int r = _get_session(op->target.osd, &s, sul);
+  r = _get_session(op->target.osd, &s, sul);
   if (r == -EAGAIN ||
       (check_for_latest_map && sul.owns_lock_shared()) ||
       cct->_conf->objecter_debug_inject_relock_delay) {
@@ -2704,6 +2780,11 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
     t->osd = -1;
     return RECALC_OP_TARGET_POOL_DNE;
   }
+
+  if (pi->has_flag(pg_pool_t::FLAG_EIO)) {
+    return RECALC_OP_TARGET_POOL_EIO;
+  }
+
   ldout(cct,30) << __func__ << "  base pi " << pi
                << " pg_num " << pi->get_pg_num() << dendl;
 
@@ -2761,14 +2842,8 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
   vector<int> up, acting;
   ps_t actual_ps = ceph_stable_mod(pgid.ps(), pg_num, pg_num_mask);
   pg_t actual_pgid(actual_ps, pgid.pool());
-  pg_mapping_t pg_mapping;
-  pg_mapping.epoch = osdmap->get_epoch();
-  if (lookup_pg_mapping(actual_pgid, &pg_mapping)) {
-    up = pg_mapping.up;
-    up_primary = pg_mapping.up_primary;
-    acting = pg_mapping.acting;
-    acting_primary = pg_mapping.acting_primary;
-  } else {
+  if (!lookup_pg_mapping(actual_pgid, osdmap->get_epoch(), &up, &up_primary,
+                         &acting, &acting_primary)) {
     osdmap->pg_to_up_acting_osds(actual_pgid, &up, &up_primary,
                                  &acting, &acting_primary);
     pg_mapping_t pg_mapping(osdmap->get_epoch(),
@@ -2838,10 +2913,10 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
 
   if (legacy_change || split_or_merge || force_resend) {
     t->pgid = pgid;
-    t->acting = acting;
+    t->acting = std::move(acting);
     t->acting_primary = acting_primary;
     t->up_primary = up_primary;
-    t->up = up;
+    t->up = std::move(up);
     t->size = size;
     t->min_size = min_size;
     t->pg_num = pg_num;
@@ -2849,8 +2924,8 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
     t->pg_num_pending = pg_num_pending;
     spg_t spgid(actual_pgid);
     if (pi->is_erasure()) {
-      for (uint8_t i = 0; i < acting.size(); ++i) {
-        if (acting[i] == acting_primary) {
+      for (uint8_t i = 0; i < t->acting.size(); ++i) {
+        if (t->acting[i] == acting_primary) {
           spgid.reset_shard(shard_id_t(i));
           break;
         }
@@ -2865,31 +2940,31 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
     t->peering_crush_mandatory_member = pi->peering_crush_mandatory_member;
     ldout(cct, 10) << __func__ << " "
                   << " raw pgid " << pgid << " -> actual " << t->actual_pgid
-                  << " acting " << acting
+                  << " acting " << t->acting
                   << " primary " << acting_primary << dendl;
     t->used_replica = false;
     if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS |
                      CEPH_OSD_FLAG_LOCALIZE_READS)) &&
-        !is_write && pi->is_replicated() && acting.size() > 1) {
+        !is_write && pi->is_replicated() && t->acting.size() > 1) {
       int osd;
-      ceph_assert(is_read && acting[0] == acting_primary);
+      ceph_assert(is_read && t->acting[0] == acting_primary);
       if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) {
-       int p = rand() % acting.size();
+       int p = rand() % t->acting.size();
        if (p)
          t->used_replica = true;
-       osd = acting[p];
-       ldout(cct, 10) << " chose random osd." << osd << " of " << acting
+       osd = t->acting[p];
+       ldout(cct, 10) << " chose random osd." << osd << " of " << t->acting
                       << dendl;
       } else {
        // look for a local replica.  prefer the primary if the
        // distance is the same.
        int best = -1;
        int best_locality = 0;
-       for (unsigned i = 0; i < acting.size(); ++i) {
+       for (unsigned i = 0; i < t->acting.size(); ++i) {
          int locality = osdmap->crush->get_common_ancestor_distance(
-                cct, acting[i], crush_location);
+                cct, t->acting[i], crush_location);
          ldout(cct, 20) << __func__ << " localize: rank " << i
-                        << " osd." << acting[i]
+                        << " osd." << t->acting[i]
                         << " locality " << locality << dendl;
          if (i == 0 ||
              (locality >= 0 && best_locality >= 0 &&
@@ -2902,7 +2977,7 @@ int Objecter::_calc_target(op_target_t *t, Connection *con, bool any_change)
          }
        }
        ceph_assert(best >= 0);
-       osd = acting[best];
+       osd = t->acting[best];
       }
       t->osd = osd;
     } else {
@@ -3106,6 +3181,7 @@ Objecter::MOSDOp *Objecter::_prepare_osd_op(Op *op)
 
   int flags = op->target.flags;
   flags |= CEPH_OSD_FLAG_KNOWN_REDIR;
+  flags |= CEPH_OSD_FLAG_SUPPORTSPOOLEIO;
 
   // Nothing checks this any longer, but needed for compatibility with
   // pre-luminous osds
@@ -4211,7 +4287,7 @@ void Objecter::_finish_pool_stat_op(PoolStatOp *op, int r)
   delete op;
 }
 
-void Objecter::get_fs_stats(boost::optional<int64_t> poolid,
+void Objecter::get_fs_stats(std::optional<int64_t> poolid,
                            decltype(StatfsOp::onfinish)&& onfinish)
 {
   ldout(cct, 10) << "get_fs_stats" << dendl;