1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2015 Red Hat
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "common/debug.h"
16 #include "mds/mdstypes.h"
17 #include "mds/CInode.h"
18 #include "mds/MDCache.h"
20 #include "PurgeQueue.h"
22 #define dout_context cct
23 #define dout_subsys ceph_subsys_mds
25 #define dout_prefix _prefix(_dout, rank) << __func__ << ": "
29 static ostream
& _prefix(std::ostream
*_dout
, mds_rank_t rank
) {
30 return *_dout
<< "mds." << rank
<< ".purge_queue ";
33 const std::map
<std::string
, PurgeItem::Action
> PurgeItem::actions
= {
34 {"NONE", PurgeItem::NONE
},
35 {"PURGE_FILE", PurgeItem::PURGE_FILE
},
36 {"TRUNCATE_FILE", PurgeItem::TRUNCATE_FILE
},
37 {"PURGE_DIR", PurgeItem::PURGE_DIR
}
40 void PurgeItem::encode(bufferlist
&bl
) const
42 ENCODE_START(2, 1, bl
);
43 encode((uint8_t)action
, bl
);
46 encode(layout
, bl
, CEPH_FEATURE_FS_FILE_LAYOUT_V2
);
47 encode(old_pools
, bl
);
51 uint8_t static const pad
= 0xff;
52 for (unsigned int i
= 0; i
<pad_size
; i
++) {
58 void PurgeItem::decode(bufferlist::const_iterator
&p
)
65 // bad encoding introduced by v13.2.2
70 decode(raw_action
, p
);
71 action
= (Action
)raw_action
;
78 if (p
.get_off() > struct_end
)
79 throw buffer::end_of_buffer();
81 } catch (const buffer::error
&e
) {
87 decode(raw_action
, p
);
88 action
= (Action
)raw_action
;
102 // if Objecter has any slow requests, take that as a hint and
103 // slow down our rate of purging
104 PurgeQueue::PurgeQueue(
107 const int64_t metadata_pool_
,
113 metadata_pool(metadata_pool_
),
114 finisher(cct
, "PurgeQueue", "PQ_Finisher"),
116 filer(objecter_
, &finisher
),
118 journaler("pq", MDS_INO_PURGE_QUEUE
+ rank
, metadata_pool
,
119 CEPH_FS_ONDISK_MAGIC
, objecter_
, nullptr, 0,
123 ceph_assert(cct
!= nullptr);
124 ceph_assert(on_error
!= nullptr);
125 ceph_assert(objecter
!= nullptr);
126 journaler
.set_write_error_handler(on_error
);
129 PurgeQueue::~PurgeQueue()
132 g_ceph_context
->get_perfcounters_collection()->remove(logger
.get());
137 void PurgeQueue::create_logger()
139 PerfCountersBuilder
pcb(g_ceph_context
, "purge_queue", l_pq_first
, l_pq_last
);
141 pcb
.add_u64_counter(l_pq_executed
, "pq_executed", "Purge queue tasks executed",
142 "purg", PerfCountersBuilder::PRIO_INTERESTING
);
144 pcb
.set_prio_default(PerfCountersBuilder::PRIO_USEFUL
);
145 pcb
.add_u64(l_pq_executing_ops
, "pq_executing_ops", "Purge queue ops in flight");
146 pcb
.add_u64(l_pq_executing_ops_high_water
, "pq_executing_ops_high_water", "Maximum number of executing file purge ops");
147 pcb
.add_u64(l_pq_executing
, "pq_executing", "Purge queue tasks in flight");
148 pcb
.add_u64(l_pq_executing_high_water
, "pq_executing_high_water", "Maximum number of executing file purges");
149 pcb
.add_u64(l_pq_item_in_journal
, "pq_item_in_journal", "Purge item left in journal");
151 logger
.reset(pcb
.create_perf_counters());
152 g_ceph_context
->get_perfcounters_collection()->add(logger
.get());
155 void PurgeQueue::init()
157 std::lock_guard
l(lock
);
159 ceph_assert(logger
!= nullptr);
165 void PurgeQueue::activate()
167 std::lock_guard
l(lock
);
173 // calculate purge item serialized size stored in journal
174 // used to count how many items still left in journal later
176 purge_item_journal_size
= bl
.length() + journaler
.get_journal_envelope_size();
180 dout(10) << "skipping activate: PurgeQueue is readonly" << dendl
;
184 if (journaler
.get_read_pos() == journaler
.get_write_pos())
187 if (in_flight
.empty()) {
188 dout(4) << "start work (by drain)" << dendl
;
189 finisher
.queue(new LambdaContext([this](int r
) {
190 std::lock_guard
l(lock
);
196 void PurgeQueue::shutdown()
198 std::lock_guard
l(lock
);
200 journaler
.shutdown();
205 void PurgeQueue::open(Context
*completion
)
207 dout(4) << "opening" << dendl
;
209 std::lock_guard
l(lock
);
212 waiting_for_recovery
.push_back(completion
);
214 journaler
.recover(new LambdaContext([this](int r
){
215 if (r
== -CEPHFS_ENOENT
) {
216 dout(1) << "Purge Queue not found, assuming this is an upgrade and "
217 "creating it." << dendl
;
220 std::lock_guard
l(lock
);
221 dout(4) << "open complete" << dendl
;
223 // Journaler only guarantees entries before head write_pos have been
224 // fully flushed. Before appending new entries, we need to find and
225 // drop any partial written entry.
226 if (journaler
.last_committed
.write_pos
< journaler
.get_write_pos()) {
227 dout(4) << "recovering write_pos" << dendl
;
228 journaler
.set_read_pos(journaler
.last_committed
.write_pos
);
233 journaler
.set_writeable();
235 finish_contexts(g_ceph_context
, waiting_for_recovery
);
237 derr
<< "Error " << r
<< " loading Journaler" << dendl
;
243 void PurgeQueue::wait_for_recovery(Context
* c
)
245 std::lock_guard
l(lock
);
248 } else if (readonly
) {
249 dout(10) << "cannot wait for recovery: PurgeQueue is readonly" << dendl
;
250 c
->complete(-CEPHFS_EROFS
);
252 waiting_for_recovery
.push_back(c
);
256 void PurgeQueue::_recover()
258 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
260 // Journaler::is_readable() adjusts write_pos if partial entry is encountered
262 if (!journaler
.is_readable() &&
263 !journaler
.get_error() &&
264 journaler
.get_read_pos() < journaler
.get_write_pos()) {
265 journaler
.wait_for_readable(new LambdaContext([this](int r
) {
266 std::lock_guard
l(lock
);
272 if (journaler
.get_error()) {
273 int r
= journaler
.get_error();
274 derr
<< "Error " << r
<< " recovering write_pos" << dendl
;
279 if (journaler
.get_read_pos() == journaler
.get_write_pos()) {
280 dout(4) << "write_pos recovered" << dendl
;
281 // restore original read_pos
282 journaler
.set_read_pos(journaler
.last_committed
.expire_pos
);
283 journaler
.set_writeable();
285 finish_contexts(g_ceph_context
, waiting_for_recovery
);
290 bool readable
= journaler
.try_read_entry(bl
);
291 ceph_assert(readable
); // we checked earlier
295 void PurgeQueue::create(Context
*fin
)
297 dout(4) << "creating" << dendl
;
298 std::lock_guard
l(lock
);
301 waiting_for_recovery
.push_back(fin
);
303 file_layout_t layout
= file_layout_t::get_default();
304 layout
.pool_id
= metadata_pool
;
305 journaler
.set_writeable();
306 journaler
.create(&layout
, JOURNAL_FORMAT_RESILIENT
);
307 journaler
.write_head(new LambdaContext([this](int r
) {
308 std::lock_guard
l(lock
);
313 finish_contexts(g_ceph_context
, waiting_for_recovery
);
319 * The `completion` context will always be called back via a Finisher
321 void PurgeQueue::push(const PurgeItem
&pi
, Context
*completion
)
323 dout(4) << "pushing inode " << pi
.ino
<< dendl
;
324 std::lock_guard
l(lock
);
327 dout(10) << "cannot push inode: PurgeQueue is readonly" << dendl
;
328 completion
->complete(-CEPHFS_EROFS
);
332 // Callers should have waited for open() before using us
333 ceph_assert(!journaler
.is_readonly());
338 journaler
.append_entry(bl
);
339 journaler
.wait_for_flush(completion
);
341 // Maybe go ahead and do something with it right away
342 bool could_consume
= _consume();
343 if (!could_consume
) {
344 // Usually, it is not necessary to explicitly flush here, because the reader
345 // will get flushes generated inside Journaler::is_readable. However,
346 // if we remain in a _can_consume()==false state for a long period then
347 // we should flush in order to allow MDCache to drop its strays rather
348 // than having them wait for purgequeue to progress.
349 if (!delayed_flush
) {
350 delayed_flush
= new LambdaContext([this](int r
){
351 delayed_flush
= nullptr;
355 timer
.add_event_after(
356 g_conf()->mds_purge_queue_busy_flush_period
,
362 uint32_t PurgeQueue::_calculate_ops(const PurgeItem
&item
) const
364 uint32_t ops_required
= 0;
365 if (item
.action
== PurgeItem::PURGE_DIR
) {
366 // Directory, count dirfrags to be deleted
368 if (!item
.fragtree
.is_leaf(frag_t())) {
369 item
.fragtree
.get_leaves(leaves
);
371 // One for the root, plus any leaves
372 ops_required
= 1 + leaves
.size();
374 // File, work out concurrent Filer::purge deletes
375 // Account for removing (or zeroing) backtrace
376 const uint64_t num
= (item
.size
> 0) ?
377 Striper::get_num_objects(item
.layout
, item
.size
) : 1;
381 // Account for deletions for old pools
382 if (item
.action
!= PurgeItem::TRUNCATE_FILE
) {
383 ops_required
+= item
.old_pools
.size();
390 bool PurgeQueue::_can_consume()
393 dout(10) << "can't consume: PurgeQueue is readonly" << dendl
;
397 dout(20) << ops_in_flight
<< "/" << max_purge_ops
<< " ops, "
398 << in_flight
.size() << "/" << g_conf()->mds_max_purge_files
399 << " files" << dendl
;
401 if (in_flight
.size() == 0 && cct
->_conf
->mds_max_purge_files
> 0) {
402 // Always permit consumption if nothing is in flight, so that the ops
403 // limit can never be so low as to forbid all progress (unless
404 // administrator has deliberately paused purging by setting max
405 // purge files to zero).
409 if (ops_in_flight
>= max_purge_ops
) {
410 dout(20) << "Throttling on op limit " << ops_in_flight
<< "/"
411 << max_purge_ops
<< dendl
;
415 if (in_flight
.size() >= cct
->_conf
->mds_max_purge_files
) {
416 dout(20) << "Throttling on item limit " << in_flight
.size()
417 << "/" << cct
->_conf
->mds_max_purge_files
<< dendl
;
424 void PurgeQueue::_go_readonly(int r
)
426 if (readonly
) return;
427 dout(1) << "going readonly because internal IO failed: " << strerror(-r
) << dendl
;
429 finisher
.queue(on_error
, r
);
431 journaler
.set_readonly();
432 finish_contexts(g_ceph_context
, waiting_for_recovery
, r
);
435 bool PurgeQueue::_consume()
437 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
439 bool could_consume
= false;
440 while(_can_consume()) {
443 // We are now going to read from the journal, so any proactive
444 // flush is no longer necessary. This is not functionally necessary
445 // but it can avoid generating extra fragmented flush IOs.
446 timer
.cancel_event(delayed_flush
);
447 delayed_flush
= nullptr;
450 if (int r
= journaler
.get_error()) {
451 derr
<< "Error " << r
<< " recovering write_pos" << dendl
;
453 return could_consume
;
456 if (!journaler
.is_readable()) {
457 dout(10) << " not readable right now" << dendl
;
458 // Because we are the writer and the reader of the journal
459 // via the same Journaler instance, we never need to reread_head
460 if (!journaler
.have_waiter()) {
461 journaler
.wait_for_readable(new LambdaContext([this](int r
) {
462 std::lock_guard
l(lock
);
465 } else if (r
!= -CEPHFS_EAGAIN
) {
471 return could_consume
;
474 could_consume
= true;
475 // The journaler is readable: consume an entry
477 bool readable
= journaler
.try_read_entry(bl
);
478 ceph_assert(readable
); // we checked earlier
480 dout(20) << " decoding entry" << dendl
;
482 auto q
= bl
.cbegin();
485 } catch (const buffer::error
&err
) {
486 derr
<< "Decode error at read_pos=0x" << std::hex
487 << journaler
.get_read_pos() << dendl
;
488 _go_readonly(CEPHFS_EIO
);
490 dout(20) << " executing item (" << item
.ino
<< ")" << dendl
;
491 _execute_item(item
, journaler
.get_read_pos());
494 dout(10) << " cannot consume right now" << dendl
;
496 return could_consume
;
499 class C_IO_PurgeItem_Commit
: public Context
{
501 C_IO_PurgeItem_Commit(PurgeQueue
*pq
, std::vector
<PurgeItemCommitOp
> ops
, uint64_t expire_to
)
502 : purge_queue(pq
), ops_vec(std::move(ops
)), expire_to(expire_to
) {
505 void finish(int r
) override
{
506 purge_queue
->_commit_ops(r
, ops_vec
, expire_to
);
510 PurgeQueue
*purge_queue
;
511 std::vector
<PurgeItemCommitOp
> ops_vec
;
515 void PurgeQueue::_commit_ops(int r
, const std::vector
<PurgeItemCommitOp
>& ops_vec
, uint64_t expire_to
)
518 derr
<< " r = " << r
<< dendl
;
522 SnapContext nullsnapc
;
523 C_GatherBuilder
gather(cct
);
525 for (auto &op
: ops_vec
) {
526 dout(10) << op
.item
.get_type_str() << dendl
;
527 if (op
.type
== PurgeItemCommitOp::PURGE_OP_RANGE
) {
528 uint64_t first_obj
= 0, num_obj
= 0;
529 uint64_t num
= Striper::get_num_objects(op
.item
.layout
, op
.item
.size
);
532 if (op
.item
.action
== PurgeItem::TRUNCATE_FILE
) {
540 filer
.purge_range(op
.item
.ino
, &op
.item
.layout
, op
.item
.snapc
,
541 first_obj
, num_obj
, ceph::real_clock::now(), op
.flags
,
543 } else if (op
.type
== PurgeItemCommitOp::PURGE_OP_REMOVE
) {
544 if (op
.item
.action
== PurgeItem::PURGE_DIR
) {
545 objecter
->remove(op
.oid
, op
.oloc
, nullsnapc
,
546 ceph::real_clock::now(), op
.flags
,
549 objecter
->remove(op
.oid
, op
.oloc
, op
.item
.snapc
,
550 ceph::real_clock::now(), op
.flags
,
553 } else if (op
.type
== PurgeItemCommitOp::PURGE_OP_ZERO
) {
554 filer
.zero(op
.item
.ino
, &op
.item
.layout
, op
.item
.snapc
,
555 0, op
.item
.layout
.object_size
, ceph::real_clock::now(), 0, true,
558 derr
<< "Invalid purge op: " << op
.type
<< dendl
;
563 ceph_assert(gather
.has_subs());
565 gather
.set_finisher(new C_OnFinisher(
566 new LambdaContext([this, expire_to
](int r
) {
567 std::lock_guard
l(lock
);
569 if (r
== -CEPHFS_EBLOCKLISTED
) {
570 finisher
.queue(on_error
, r
);
575 _execute_item_complete(expire_to
);
578 // Have we gone idle? If so, do an extra write_head now instead of
579 // waiting for next flush after journaler_write_head_interval.
580 // Also do this periodically even if not idle, so that the persisted
581 // expire_pos doesn't fall too far behind our progress when consuming
582 // a very long queue.
584 (in_flight
.empty() || journaler
.write_head_needed())) {
585 journaler
.write_head(nullptr);
592 void PurgeQueue::_execute_item(
593 const PurgeItem
&item
,
596 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
598 in_flight
[expire_to
] = item
;
599 logger
->set(l_pq_executing
, in_flight
.size());
600 files_high_water
= std::max
<uint64_t>(files_high_water
,
602 logger
->set(l_pq_executing_high_water
, files_high_water
);
603 auto ops
= _calculate_ops(item
);
604 ops_in_flight
+= ops
;
605 logger
->set(l_pq_executing_ops
, ops_in_flight
);
606 ops_high_water
= std::max(ops_high_water
, ops_in_flight
);
607 logger
->set(l_pq_executing_ops_high_water
, ops_high_water
);
609 std::vector
<PurgeItemCommitOp
> ops_vec
;
610 auto submit_ops
= [&]() {
611 finisher
.queue(new C_IO_PurgeItem_Commit(this, std::move(ops_vec
), expire_to
));
614 if (item
.action
== PurgeItem::PURGE_FILE
) {
616 uint64_t num
= Striper::get_num_objects(item
.layout
, item
.size
);
617 dout(10) << " 0~" << item
.size
<< " objects 0~" << num
618 << " snapc " << item
.snapc
<< " on " << item
.ino
<< dendl
;
619 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_RANGE
, 0);
622 // remove the backtrace object if it was not purged
623 object_t oid
= CInode::get_object_name(item
.ino
, frag_t(), "");
624 if (ops_vec
.empty() || !item
.layout
.pool_ns
.empty()) {
625 object_locator_t
oloc(item
.layout
.pool_id
);
626 dout(10) << " remove backtrace object " << oid
627 << " pool " << oloc
.pool
<< " snapc " << item
.snapc
<< dendl
;
628 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_REMOVE
, 0, oid
, oloc
);
631 // remove old backtrace objects
632 for (const auto &p
: item
.old_pools
) {
633 object_locator_t
oloc(p
);
634 dout(10) << " remove backtrace object " << oid
635 << " old pool " << p
<< " snapc " << item
.snapc
<< dendl
;
636 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_REMOVE
, 0, oid
, oloc
);
638 } else if (item
.action
== PurgeItem::PURGE_DIR
) {
639 object_locator_t
oloc(metadata_pool
);
641 if (!item
.fragtree
.is_leaf(frag_t()))
642 item
.fragtree
.get_leaves(leaves
);
643 leaves
.push_back(frag_t());
644 for (const auto &leaf
: leaves
) {
645 object_t oid
= CInode::get_object_name(item
.ino
, leaf
, "");
646 dout(10) << " remove dirfrag " << oid
<< dendl
;
647 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_REMOVE
, 0, oid
, oloc
);
649 } else if (item
.action
== PurgeItem::TRUNCATE_FILE
) {
650 const uint64_t num
= Striper::get_num_objects(item
.layout
, item
.size
);
651 dout(10) << " 0~" << item
.size
<< " objects 0~" << num
652 << " snapc " << item
.snapc
<< " on " << item
.ino
<< dendl
;
654 // keep backtrace object
656 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_RANGE
, 0);
658 ops_vec
.emplace_back(item
, PurgeItemCommitOp::PURGE_OP_ZERO
, 0);
660 derr
<< "Invalid item (action=" << item
.action
<< ") in purge queue, "
661 "dropping it" << dendl
;
662 ops_in_flight
-= ops
;
663 logger
->set(l_pq_executing_ops
, ops_in_flight
);
664 ops_high_water
= std::max(ops_high_water
, ops_in_flight
);
665 logger
->set(l_pq_executing_ops_high_water
, ops_high_water
);
666 in_flight
.erase(expire_to
);
667 logger
->set(l_pq_executing
, in_flight
.size());
668 files_high_water
= std::max
<uint64_t>(files_high_water
,
670 logger
->set(l_pq_executing_high_water
, files_high_water
);
677 void PurgeQueue::_execute_item_complete(
680 ceph_assert(ceph_mutex_is_locked_by_me(lock
));
681 dout(10) << "complete at 0x" << std::hex
<< expire_to
<< std::dec
<< dendl
;
682 ceph_assert(in_flight
.count(expire_to
) == 1);
684 auto iter
= in_flight
.find(expire_to
);
685 ceph_assert(iter
!= in_flight
.end());
686 if (iter
== in_flight
.begin()) {
687 uint64_t pos
= expire_to
;
688 if (!pending_expire
.empty()) {
691 if (n
== in_flight
.end()) {
692 pos
= *pending_expire
.rbegin();
693 pending_expire
.clear();
695 auto p
= pending_expire
.begin();
700 pending_expire
.erase(p
++);
701 } while (p
!= pending_expire
.end());
704 dout(10) << "expiring to 0x" << std::hex
<< pos
<< std::dec
<< dendl
;
705 journaler
.set_expire_pos(pos
);
707 // This is completely fine, we're not supposed to purge files in
708 // order when doing them in parallel.
709 dout(10) << "non-sequential completion, not expiring anything" << dendl
;
710 pending_expire
.insert(expire_to
);
713 ops_in_flight
-= _calculate_ops(iter
->second
);
714 logger
->set(l_pq_executing_ops
, ops_in_flight
);
715 ops_high_water
= std::max(ops_high_water
, ops_in_flight
);
716 logger
->set(l_pq_executing_ops_high_water
, ops_high_water
);
718 dout(10) << "completed item for ino " << iter
->second
.ino
<< dendl
;
720 in_flight
.erase(iter
);
721 logger
->set(l_pq_executing
, in_flight
.size());
722 files_high_water
= std::max
<uint64_t>(files_high_water
,
724 logger
->set(l_pq_executing_high_water
, files_high_water
);
725 dout(10) << "in_flight.size() now " << in_flight
.size() << dendl
;
727 uint64_t write_pos
= journaler
.get_write_pos();
728 uint64_t read_pos
= journaler
.get_read_pos();
729 uint64_t expire_pos
= journaler
.get_expire_pos();
730 uint64_t item_num
= (write_pos
- (in_flight
.size() ? expire_pos
: read_pos
))
731 / purge_item_journal_size
;
732 dout(10) << "left purge items in journal: " << item_num
733 << " (purge_item_journal_size/write_pos/read_pos/expire_pos) now at "
734 << "(" << purge_item_journal_size
<< "/" << write_pos
<< "/" << read_pos
735 << "/" << expire_pos
<< ")" << dendl
;
737 logger
->set(l_pq_item_in_journal
, item_num
);
738 logger
->inc(l_pq_executed
);
741 void PurgeQueue::update_op_limit(const MDSMap
&mds_map
)
743 std::lock_guard
l(lock
);
746 dout(10) << "skipping; PurgeQueue is readonly" << dendl
;
750 uint64_t pg_count
= 0;
751 objecter
->with_osdmap([&](const OSDMap
& o
) {
752 // Number of PGs across all data pools
753 const std::vector
<int64_t> &data_pools
= mds_map
.get_data_pools();
754 for (const auto dp
: data_pools
) {
755 if (o
.get_pg_pool(dp
) == NULL
) {
756 // It is possible that we have an older OSDMap than MDSMap,
757 // because we don't start watching every OSDMap until after
758 // MDSRank is initialized
759 dout(4) << " data pool " << dp
<< " not found in OSDMap" << dendl
;
762 pg_count
+= o
.get_pg_num(dp
);
766 // Work out a limit based on n_pgs / n_mdss, multiplied by the user's
767 // preference for how many ops per PG
768 max_purge_ops
= uint64_t(((double)pg_count
/ (double)mds_map
.get_max_mds()) *
769 cct
->_conf
->mds_max_purge_ops_per_pg
);
771 // User may also specify a hard limit, apply this if so.
772 if (cct
->_conf
->mds_max_purge_ops
) {
773 max_purge_ops
= std::min(max_purge_ops
, cct
->_conf
->mds_max_purge_ops
);
777 void PurgeQueue::handle_conf_change(const std::set
<std::string
>& changed
, const MDSMap
& mds_map
)
779 if (changed
.count("mds_max_purge_ops")
780 || changed
.count("mds_max_purge_ops_per_pg")) {
781 update_op_limit(mds_map
);
782 } else if (changed
.count("mds_max_purge_files")) {
783 std::lock_guard
l(lock
);
784 if (in_flight
.empty()) {
785 // We might have gone from zero to a finite limit, so
786 // might need to kick off consume.
787 dout(4) << "maybe start work again (max_purge_files="
788 << g_conf()->mds_max_purge_files
<< dendl
;
789 finisher
.queue(new LambdaContext([this](int r
){
790 std::lock_guard
l(lock
);
797 bool PurgeQueue::drain(
799 uint64_t *progress_total
,
800 size_t *in_flight_count
803 std::lock_guard
l(lock
);
806 dout(10) << "skipping drain; PurgeQueue is readonly" << dendl
;
810 ceph_assert(progress
!= nullptr);
811 ceph_assert(progress_total
!= nullptr);
812 ceph_assert(in_flight_count
!= nullptr);
814 const bool done
= in_flight
.empty() && (
815 journaler
.get_read_pos() == journaler
.get_write_pos());
820 const uint64_t bytes_remaining
= journaler
.get_write_pos()
821 - journaler
.get_read_pos();
824 // Start of draining: remember how much there was outstanding at
825 // this point so that we can give a progress percentage later
828 // Life the op throttle as this daemon now has nothing to do but
829 // drain the purge queue, so do it as fast as we can.
830 max_purge_ops
= 0xffff;
833 drain_initial
= std::max(bytes_remaining
, drain_initial
);
835 *progress
= drain_initial
- bytes_remaining
;
836 *progress_total
= drain_initial
;
837 *in_flight_count
= in_flight
.size();
842 std::string_view
PurgeItem::get_type_str() const
845 case PurgeItem::NONE
: return "NONE";
846 case PurgeItem::PURGE_FILE
: return "PURGE_FILE";
847 case PurgeItem::PURGE_DIR
: return "PURGE_DIR";
848 case PurgeItem::TRUNCATE_FILE
: return "TRUNCATE_FILE";