X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Fcommon%2FTrackedOp.cc;h=49a7a988935c3c692cb40d2e53a0ef982e5378a0;hb=92f5a8d42d07f9929ae4fa7e01342fe8d96808a8;hp=975e58a226013327a8a35d3b859f6e284a328aa5;hpb=a0324939f9d0e1905d5df8f57442f09dc70af83d;p=ceph.git diff --git a/ceph/src/common/TrackedOp.cc b/ceph/src/common/TrackedOp.cc index 975e58a22..49a7a9889 100644 --- a/ceph/src/common/TrackedOp.cc +++ b/ceph/src/common/TrackedOp.cc @@ -78,7 +78,7 @@ void OpHistory::_insert_delayed(const utime_t& now, TrackedOpRef op) double opduration = op->get_duration(); duration.insert(make_pair(opduration, op)); arrived.insert(make_pair(op->get_initiated(), op)); - if (opduration >= history_slow_op_threshold) + if (opduration >= history_slow_op_threshold.load()) slow_op.insert(make_pair(op->get_initiated(), op)); cleanup(now); } @@ -87,21 +87,21 @@ void OpHistory::cleanup(utime_t now) { while (arrived.size() && (now - arrived.begin()->first > - (double)(history_duration))) { + (double)(history_duration.load()))) { duration.erase(make_pair( arrived.begin()->second->get_duration(), arrived.begin()->second)); arrived.erase(arrived.begin()); } - while (duration.size() > history_size) { + while (duration.size() > history_size.load()) { arrived.erase(make_pair( duration.begin()->second->get_initiated(), duration.begin()->second)); duration.erase(duration.begin()); } - while (slow_op.size() > history_slow_op_size) { + while (slow_op.size() > history_slow_op_size.load()) { slow_op.erase(make_pair( slow_op.begin()->second->get_initiated(), slow_op.begin()->second)); @@ -113,8 +113,8 @@ void OpHistory::dump_ops(utime_t now, Formatter *f, set filters, bool by std::lock_guard history_lock(ops_history_lock); cleanup(now); f->open_object_section("op_history"); - f->dump_int("size", history_size); - f->dump_int("duration", history_duration); + f->dump_int("size", history_size.load()); + f->dump_int("duration", history_duration.load()); { f->open_array_section("ops"); auto dump_fn = [&f, &now, &filters](auto begin_iter, auto end_iter) { @@ -182,8 +182,8 @@ void OpHistory::dump_slow_ops(utime_t now, Formatter *f, set filters) std::lock_guard history_lock(ops_history_lock); cleanup(now); f->open_object_section("OpHistory slow ops"); - f->dump_int("num to keep", history_slow_op_size); - f->dump_int("threshold to keep", history_slow_op_threshold); + f->dump_int("num to keep", history_slow_op_size.load()); + f->dump_int("threshold to keep", history_slow_op_threshold.load()); { f->open_array_section("Ops"); for (set >::const_iterator i = @@ -294,7 +294,15 @@ bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs, const utime_t now = ceph_clock_now(); utime_t oldest_op = now; - uint64_t total_ops_in_flight = 0; + // single representation of all inflight operations reunified + // from OpTracker's shards. TrackedOpRef extends the lifetime + // to carry the ops outside of the critical section, and thus + // allows to call the visitor without any lock being held. + // This simplifies the contract on API at the price of plenty + // additional moves and atomic ref-counting. This seems OK as + // `visit_ops_in_flight()` is definitely not intended for any + // hot path. + std::vector ops_in_flight; RWLock::RLocker l(lock); for (const auto sdata : sharded_in_flight_list) { @@ -307,26 +315,29 @@ bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs, oldest_op = oldest_op_tmp; } } - total_ops_in_flight += sdata->ops_in_flight_sharded.size(); + std::transform(std::begin(sdata->ops_in_flight_sharded), + std::end(sdata->ops_in_flight_sharded), + std::back_inserter(ops_in_flight), + [] (TrackedOp& op) { return TrackedOpRef(&op); }); } - if (!total_ops_in_flight) + if (ops_in_flight.empty()) return false; *oldest_secs = now - oldest_op; - dout(10) << "ops_in_flight.size: " << total_ops_in_flight + dout(10) << "ops_in_flight.size: " << ops_in_flight.size() << "; oldest is " << *oldest_secs << " seconds old" << dendl; if (*oldest_secs < complaint_time) return false; - for (uint32_t iter = 0; iter < num_optracker_shards; iter++) { - ShardedTrackingData* sdata = sharded_in_flight_list[iter]; - ceph_assert(NULL != sdata); - std::lock_guard locker(sdata->ops_in_flight_lock_sharded); - for (auto& op : sdata->ops_in_flight_sharded) { - if (!visit(op)) - break; - } + l.unlock(); + for (auto& op : ops_in_flight) { + // `lock` neither `ops_in_flight_lock_sharded` should be held when + // calling the visitor. Otherwise `OSD::get_health_metrics()` can + // dead-lock due to the `~TrackedOp()` calling `record_history_op()` + // or `unregister_inflight_op()`. + if (!visit(*op)) + break; } return true; }