double opduration = op->get_duration();
duration.insert(make_pair(opduration, op));
arrived.insert(make_pair(op->get_initiated(), op));
- if (opduration >= history_slow_op_threshold)
+ if (opduration >= history_slow_op_threshold.load())
slow_op.insert(make_pair(op->get_initiated(), op));
cleanup(now);
}
{
while (arrived.size() &&
(now - arrived.begin()->first >
- (double)(history_duration))) {
+ (double)(history_duration.load()))) {
duration.erase(make_pair(
arrived.begin()->second->get_duration(),
arrived.begin()->second));
arrived.erase(arrived.begin());
}
- while (duration.size() > history_size) {
+ while (duration.size() > history_size.load()) {
arrived.erase(make_pair(
duration.begin()->second->get_initiated(),
duration.begin()->second));
duration.erase(duration.begin());
}
- while (slow_op.size() > history_slow_op_size) {
+ while (slow_op.size() > history_slow_op_size.load()) {
slow_op.erase(make_pair(
slow_op.begin()->second->get_initiated(),
slow_op.begin()->second));
std::lock_guard history_lock(ops_history_lock);
cleanup(now);
f->open_object_section("op_history");
- f->dump_int("size", history_size);
- f->dump_int("duration", history_duration);
+ f->dump_int("size", history_size.load());
+ f->dump_int("duration", history_duration.load());
{
f->open_array_section("ops");
auto dump_fn = [&f, &now, &filters](auto begin_iter, auto end_iter) {
std::lock_guard history_lock(ops_history_lock);
cleanup(now);
f->open_object_section("OpHistory slow ops");
- f->dump_int("num to keep", history_slow_op_size);
- f->dump_int("threshold to keep", history_slow_op_threshold);
+ f->dump_int("num to keep", history_slow_op_size.load());
+ f->dump_int("threshold to keep", history_slow_op_threshold.load());
{
f->open_array_section("Ops");
for (set<pair<utime_t, TrackedOpRef> >::const_iterator i =
const utime_t now = ceph_clock_now();
utime_t oldest_op = now;
- uint64_t total_ops_in_flight = 0;
+ // single representation of all inflight operations reunified
+ // from OpTracker's shards. TrackedOpRef extends the lifetime
+ // to carry the ops outside of the critical section, and thus
+ // allows to call the visitor without any lock being held.
+ // This simplifies the contract on API at the price of plenty
+ // additional moves and atomic ref-counting. This seems OK as
+ // `visit_ops_in_flight()` is definitely not intended for any
+ // hot path.
+ std::vector<TrackedOpRef> ops_in_flight;
RWLock::RLocker l(lock);
for (const auto sdata : sharded_in_flight_list) {
oldest_op = oldest_op_tmp;
}
}
- total_ops_in_flight += sdata->ops_in_flight_sharded.size();
+ std::transform(std::begin(sdata->ops_in_flight_sharded),
+ std::end(sdata->ops_in_flight_sharded),
+ std::back_inserter(ops_in_flight),
+ [] (TrackedOp& op) { return TrackedOpRef(&op); });
}
- if (!total_ops_in_flight)
+ if (ops_in_flight.empty())
return false;
*oldest_secs = now - oldest_op;
- dout(10) << "ops_in_flight.size: " << total_ops_in_flight
+ dout(10) << "ops_in_flight.size: " << ops_in_flight.size()
<< "; oldest is " << *oldest_secs
<< " seconds old" << dendl;
if (*oldest_secs < complaint_time)
return false;
- for (uint32_t iter = 0; iter < num_optracker_shards; iter++) {
- ShardedTrackingData* sdata = sharded_in_flight_list[iter];
- ceph_assert(NULL != sdata);
- std::lock_guard locker(sdata->ops_in_flight_lock_sharded);
- for (auto& op : sdata->ops_in_flight_sharded) {
- if (!visit(op))
- break;
- }
+ l.unlock();
+ for (auto& op : ops_in_flight) {
+ // `lock` neither `ops_in_flight_lock_sharded` should be held when
+ // calling the visitor. Otherwise `OSD::get_health_metrics()` can
+ // dead-lock due to the `~TrackedOp()` calling `record_history_op()`
+ // or `unregister_inflight_op()`.
+ if (!visit(*op))
+ break;
}
return true;
}