1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2015 Red Hat
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef PURGE_QUEUE_H_
16 #define PURGE_QUEUE_H_
18 #include "include/compact_set.h"
19 #include "common/Finisher.h"
20 #include "mds/MDSMap.h"
21 #include "osdc/Journaler.h"
25 * Descriptor of the work associated with purging a file. We record
26 * the minimal amount of information from the inode such as the size
27 * and layout: all other un-needed inode metadata (times, permissions, etc)
33 enum Action
: uint8_t {
42 void encode(bufferlist
&bl
) const;
43 void decode(bufferlist::const_iterator
&p
);
45 static Action
str_to_type(std::string_view str
) {
46 return PurgeItem::actions
.at(std::string(str
));
49 void dump(Formatter
*f
) const
51 f
->dump_int("action", action
);
52 f
->dump_int("ino", ino
);
53 f
->dump_int("size", size
);
54 f
->open_object_section("layout");
57 f
->open_object_section("SnapContext");
60 f
->open_object_section("fragtree");
65 std::string_view
get_type_str() const;
68 //None PurgeItem serves as NoOp for splicing out journal entries;
69 //so there has to be a "pad_size" to specify the size of journal
70 //space to be spliced.
71 uint32_t pad_size
= 0;
76 std::vector
<int64_t> old_pools
;
80 static const std::map
<std::string
, PurgeItem::Action
> actions
;
82 WRITE_CLASS_ENCODER(PurgeItem
)
87 // How many items have been finished by PurgeQueue
89 l_pq_executing_ops_high_water
,
91 l_pq_executing_high_water
,
97 struct PurgeItemCommitOp
{
99 enum PurgeType
: uint8_t {
105 PurgeItemCommitOp(PurgeItem _item
, PurgeType _type
, int _flags
)
106 : item(_item
), type(_type
), flags(_flags
) {}
108 PurgeItemCommitOp(PurgeItem _item
, PurgeType _type
, int _flags
,
109 object_t _oid
, object_locator_t _oloc
)
110 : item(_item
), type(_type
), flags(_flags
), oid(_oid
), oloc(_oloc
) {}
116 object_locator_t oloc
;
120 * A persistent queue of PurgeItems. This class both writes and reads
121 * to the queue. There is one of these per MDS rank.
123 * Note that this class does not take a reference to MDSRank: we are
124 * independent of all the metadata structures and do not need to
125 * take mds_lock for anything.
133 const int64_t metadata_pool_
,
142 void create_logger();
144 // Write an empty queue, use this during MDS rank creation
145 void create(Context
*completion
);
147 // Read the Journaler header for an existing queue and start consuming
148 void open(Context
*completion
);
150 void wait_for_recovery(Context
*c
);
152 // Submit one entry to the work queue. Call back when it is persisted
153 // to the queue (there is no callback for when it is executed)
154 void push(const PurgeItem
&pi
, Context
*completion
);
156 void _commit_ops(int r
, const std::vector
<PurgeItemCommitOp
>& ops_vec
, uint64_t expire_to
);
158 // If the on-disk queue is empty and we are not currently processing
160 bool is_idle() const;
163 * Signal to the PurgeQueue that you would like it to hurry up and
164 * finish consuming everything in the queue. Provides progress
167 * @param progress: bytes consumed since we started draining
168 * @param progress_total: max bytes that were outstanding during purge
169 * @param in_flight_count: number of file purges currently in flight
171 * @returns true if drain is complete
175 uint64_t *progress_total
,
176 size_t *in_flight_count
);
178 void update_op_limit(const MDSMap
&mds_map
);
180 void handle_conf_change(const std::set
<std::string
>& changed
, const MDSMap
& mds_map
);
183 uint32_t _calculate_ops(const PurgeItem
&item
) const;
187 // recover the journal write_pos (drop any partial written entry)
191 * @return true if we were in a position to try and consume something:
192 * does not mean we necessarily did.
196 void _execute_item(const PurgeItem
&item
, uint64_t expire_to
);
197 void _execute_item_complete(uint64_t expire_to
);
199 void _go_readonly(int r
);
202 const mds_rank_t rank
;
203 ceph::mutex lock
= ceph::make_mutex("PurgeQueue");
204 bool readonly
= false;
206 int64_t metadata_pool
;
208 // Don't use the MDSDaemon's Finisher and Timer, because this class
209 // operates outside of MDSDaemon::mds_lock
214 std::unique_ptr
<PerfCounters
> logger
;
220 // Map of Journaler offset to PurgeItem
221 std::map
<uint64_t, PurgeItem
> in_flight
;
223 std::set
<uint64_t> pending_expire
;
225 // Throttled allowances
226 uint64_t ops_in_flight
= 0;
228 // Dynamic op limit per MDS based on PG count
229 uint64_t max_purge_ops
= 0;
231 // How many bytes were remaining when drain() was first called,
232 // used for indicating progress.
233 uint64_t drain_initial
= 0;
235 // Has drain() ever been called on this instance?
236 bool draining
= false;
238 // Do we currently have a flush timer event waiting?
239 Context
*delayed_flush
= nullptr;
241 bool recovered
= false;
242 std::vector
<Context
*> waiting_for_recovery
;
244 size_t purge_item_journal_size
;
246 uint64_t ops_high_water
= 0;
247 uint64_t files_high_water
= 0;