using ceph::Formatter;
using std::defer_lock;
+using std::scoped_lock;
+using std::shared_lock;
+using std::unique_lock;
using ceph::real_time;
using ceph::real_clock;
// Populate Op::target
OSDSession *s = NULL;
- _calc_target(&info->target, nullptr);
+ int r = _calc_target(&info->target, nullptr);
+ switch (r) {
+ case RECALC_OP_TARGET_POOL_EIO:
+ _check_linger_pool_eio(info);
+ return;
+ }
// Create LingerOp<->OSDSession relation
- int r = _get_session(info->target.osd, &s, sul);
+ r = _get_session(info->target.osd, &s, sul);
ceph_assert(r == 0);
unique_lock sl(s->lock);
_session_linger_op_assign(s, info);
unregister_lingers.push_back(op);
}
break;
+ case RECALC_OP_TARGET_POOL_EIO:
+ _check_linger_pool_eio(op);
+ ldout(cct, 10) << " need to unregister linger op "
+ << op->linger_id << dendl;
+ op->get();
+ unregister_lingers.push_back(op);
+ break;
}
}
case RECALC_OP_TARGET_POOL_DNE:
_check_op_pool_dne(op, &sl);
break;
+ case RECALC_OP_TARGET_POOL_EIO:
+ _check_op_pool_eio(op, &sl);
+ break;
}
}
}
}
+// sl may be unlocked.
+void Objecter::_check_op_pool_eio(Op *op, std::unique_lock<std::shared_mutex> *sl)
+{
+ // rwlock is locked unique
+
+ // we had a new enough map
+ ldout(cct, 10) << "check_op_pool_eio tid " << op->tid
+ << " concluding pool " << op->target.base_pgid.pool()
+ << " has eio" << dendl;
+ if (op->has_completion()) {
+ num_in_flight--;
+ op->complete(osdc_errc::pool_eio, -EIO);
+ }
+
+ OSDSession *s = op->session;
+ if (s) {
+ ceph_assert(s != NULL);
+ ceph_assert(sl->mutex() == &s->lock);
+ bool session_locked = sl->owns_lock();
+ if (!session_locked) {
+ sl->lock();
+ }
+ _finish_op(op, 0);
+ if (!session_locked) {
+ sl->unlock();
+ }
+ } else {
+ _finish_op(op, 0); // no session
+ }
+}
+
void Objecter::_send_op_map_check(Op *op)
{
// rwlock is locked unique
}
}
+void Objecter::_check_linger_pool_eio(LingerOp *op)
+{
+ // rwlock is locked unique
+
+ std::unique_lock wl{op->watch_lock};
+ if (op->on_reg_commit) {
+ op->on_reg_commit->defer(std::move(op->on_reg_commit),
+ osdc_errc::pool_dne, cb::list{});
+ op->on_reg_commit = nullptr;
+ }
+ if (op->on_notify_finish) {
+ op->on_notify_finish->defer(std::move(op->on_notify_finish),
+ osdc_errc::pool_dne, cb::list{});
+ op->on_notify_finish = nullptr;
+ }
+}
+
void Objecter::_send_linger_map_check(LingerOp *op)
{
// ask the monitor
ceph_assert(op->session == NULL);
OSDSession *s = NULL;
- bool check_for_latest_map = _calc_target(&op->target, nullptr)
- == RECALC_OP_TARGET_POOL_DNE;
+ bool check_for_latest_map = false;
+ int r = _calc_target(&op->target, nullptr);
+ switch(r) {
+ case RECALC_OP_TARGET_POOL_DNE:
+ check_for_latest_map = true;
+ break;
+ case RECALC_OP_TARGET_POOL_EIO:
+ if (op->has_completion()) {
+ op->complete(osdc_errc::pool_eio, -EIO);
+ }
+ return;
+ }
// Try to get a session, including a retry if we need to take write lock
- int r = _get_session(op->target.osd, &s, sul);
+ r = _get_session(op->target.osd, &s, sul);
if (r == -EAGAIN ||
(check_for_latest_map && sul.owns_lock_shared()) ||
cct->_conf->objecter_debug_inject_relock_delay) {
t->osd = -1;
return RECALC_OP_TARGET_POOL_DNE;
}
+
+ if (pi->has_flag(pg_pool_t::FLAG_EIO)) {
+ return RECALC_OP_TARGET_POOL_EIO;
+ }
+
ldout(cct,30) << __func__ << " base pi " << pi
<< " pg_num " << pi->get_pg_num() << dendl;
vector<int> up, acting;
ps_t actual_ps = ceph_stable_mod(pgid.ps(), pg_num, pg_num_mask);
pg_t actual_pgid(actual_ps, pgid.pool());
- pg_mapping_t pg_mapping;
- pg_mapping.epoch = osdmap->get_epoch();
- if (lookup_pg_mapping(actual_pgid, &pg_mapping)) {
- up = pg_mapping.up;
- up_primary = pg_mapping.up_primary;
- acting = pg_mapping.acting;
- acting_primary = pg_mapping.acting_primary;
- } else {
+ if (!lookup_pg_mapping(actual_pgid, osdmap->get_epoch(), &up, &up_primary,
+ &acting, &acting_primary)) {
osdmap->pg_to_up_acting_osds(actual_pgid, &up, &up_primary,
&acting, &acting_primary);
pg_mapping_t pg_mapping(osdmap->get_epoch(),
if (legacy_change || split_or_merge || force_resend) {
t->pgid = pgid;
- t->acting = acting;
+ t->acting = std::move(acting);
t->acting_primary = acting_primary;
t->up_primary = up_primary;
- t->up = up;
+ t->up = std::move(up);
t->size = size;
t->min_size = min_size;
t->pg_num = pg_num;
t->pg_num_pending = pg_num_pending;
spg_t spgid(actual_pgid);
if (pi->is_erasure()) {
- for (uint8_t i = 0; i < acting.size(); ++i) {
- if (acting[i] == acting_primary) {
+ for (uint8_t i = 0; i < t->acting.size(); ++i) {
+ if (t->acting[i] == acting_primary) {
spgid.reset_shard(shard_id_t(i));
break;
}
t->peering_crush_mandatory_member = pi->peering_crush_mandatory_member;
ldout(cct, 10) << __func__ << " "
<< " raw pgid " << pgid << " -> actual " << t->actual_pgid
- << " acting " << acting
+ << " acting " << t->acting
<< " primary " << acting_primary << dendl;
t->used_replica = false;
if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS |
CEPH_OSD_FLAG_LOCALIZE_READS)) &&
- !is_write && pi->is_replicated() && acting.size() > 1) {
+ !is_write && pi->is_replicated() && t->acting.size() > 1) {
int osd;
- ceph_assert(is_read && acting[0] == acting_primary);
+ ceph_assert(is_read && t->acting[0] == acting_primary);
if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) {
- int p = rand() % acting.size();
+ int p = rand() % t->acting.size();
if (p)
t->used_replica = true;
- osd = acting[p];
- ldout(cct, 10) << " chose random osd." << osd << " of " << acting
+ osd = t->acting[p];
+ ldout(cct, 10) << " chose random osd." << osd << " of " << t->acting
<< dendl;
} else {
// look for a local replica. prefer the primary if the
// distance is the same.
int best = -1;
int best_locality = 0;
- for (unsigned i = 0; i < acting.size(); ++i) {
+ for (unsigned i = 0; i < t->acting.size(); ++i) {
int locality = osdmap->crush->get_common_ancestor_distance(
- cct, acting[i], crush_location);
+ cct, t->acting[i], crush_location);
ldout(cct, 20) << __func__ << " localize: rank " << i
- << " osd." << acting[i]
+ << " osd." << t->acting[i]
<< " locality " << locality << dendl;
if (i == 0 ||
(locality >= 0 && best_locality >= 0 &&
}
}
ceph_assert(best >= 0);
- osd = acting[best];
+ osd = t->acting[best];
}
t->osd = osd;
} else {
int flags = op->target.flags;
flags |= CEPH_OSD_FLAG_KNOWN_REDIR;
+ flags |= CEPH_OSD_FLAG_SUPPORTSPOOLEIO;
// Nothing checks this any longer, but needed for compatibility with
// pre-luminous osds
delete op;
}
-void Objecter::get_fs_stats(boost::optional<int64_t> poolid,
+void Objecter::get_fs_stats(std::optional<int64_t> poolid,
decltype(StatfsOp::onfinish)&& onfinish)
{
ldout(cct, 10) << "get_fs_stats" << dendl;