]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/PurgeQueue.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / mds / PurgeQueue.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef PURGE_QUEUE_H_
16 #define PURGE_QUEUE_H_
17
18 #include "include/compact_set.h"
19 #include "common/Finisher.h"
20 #include "mds/MDSMap.h"
21 #include "osdc/Journaler.h"
22
23
24 /**
25 * Descriptor of the work associated with purging a file. We record
26 * the minimal amount of information from the inode such as the size
27 * and layout: all other un-needed inode metadata (times, permissions, etc)
28 * has been discarded.
29 */
30 class PurgeItem
31 {
32 public:
33 enum Action : uint8_t {
34 NONE = 0,
35 PURGE_FILE = 1,
36 TRUNCATE_FILE,
37 PURGE_DIR
38 };
39
40 PurgeItem() {}
41
42 void encode(bufferlist &bl) const;
43 void decode(bufferlist::const_iterator &p);
44
45 static Action str_to_type(std::string_view str) {
46 return PurgeItem::actions.at(std::string(str));
47 }
48
49 void dump(Formatter *f) const
50 {
51 f->dump_int("action", action);
52 f->dump_int("ino", ino);
53 f->dump_int("size", size);
54 f->open_object_section("layout");
55 layout.dump(f);
56 f->close_section();
57 f->open_object_section("SnapContext");
58 snapc.dump(f);
59 f->close_section();
60 f->open_object_section("fragtree");
61 fragtree.dump(f);
62 f->close_section();
63 }
64
65 std::string_view get_type_str() const;
66
67 utime_t stamp;
68 //None PurgeItem serves as NoOp for splicing out journal entries;
69 //so there has to be a "pad_size" to specify the size of journal
70 //space to be spliced.
71 uint32_t pad_size = 0;
72 Action action = NONE;
73 inodeno_t ino = 0;
74 uint64_t size = 0;
75 file_layout_t layout;
76 std::vector<int64_t> old_pools;
77 SnapContext snapc;
78 fragtree_t fragtree;
79 private:
80 static const std::map<std::string, PurgeItem::Action> actions;
81 };
82 WRITE_CLASS_ENCODER(PurgeItem)
83
84 enum {
85 l_pq_first = 3500,
86
87 // How many items have been finished by PurgeQueue
88 l_pq_executing_ops,
89 l_pq_executing_ops_high_water,
90 l_pq_executing,
91 l_pq_executing_high_water,
92 l_pq_executed,
93 l_pq_item_in_journal,
94 l_pq_last
95 };
96
97 struct PurgeItemCommitOp {
98 public:
99 enum PurgeType : uint8_t {
100 PURGE_OP_RANGE = 0,
101 PURGE_OP_REMOVE = 1,
102 PURGE_OP_ZERO
103 };
104
105 PurgeItemCommitOp(PurgeItem _item, PurgeType _type, int _flags)
106 : item(_item), type(_type), flags(_flags) {}
107
108 PurgeItemCommitOp(PurgeItem _item, PurgeType _type, int _flags,
109 object_t _oid, object_locator_t _oloc)
110 : item(_item), type(_type), flags(_flags), oid(_oid), oloc(_oloc) {}
111
112 PurgeItem item;
113 PurgeType type;
114 int flags;
115 object_t oid;
116 object_locator_t oloc;
117 };
118
119 /**
120 * A persistent queue of PurgeItems. This class both writes and reads
121 * to the queue. There is one of these per MDS rank.
122 *
123 * Note that this class does not take a reference to MDSRank: we are
124 * independent of all the metadata structures and do not need to
125 * take mds_lock for anything.
126 */
127 class PurgeQueue
128 {
129 public:
130 PurgeQueue(
131 CephContext *cct_,
132 mds_rank_t rank_,
133 const int64_t metadata_pool_,
134 Objecter *objecter_,
135 Context *on_error);
136 ~PurgeQueue();
137
138 void init();
139 void activate();
140 void shutdown();
141
142 void create_logger();
143
144 // Write an empty queue, use this during MDS rank creation
145 void create(Context *completion);
146
147 // Read the Journaler header for an existing queue and start consuming
148 void open(Context *completion);
149
150 void wait_for_recovery(Context *c);
151
152 // Submit one entry to the work queue. Call back when it is persisted
153 // to the queue (there is no callback for when it is executed)
154 void push(const PurgeItem &pi, Context *completion);
155
156 void _commit_ops(int r, const std::vector<PurgeItemCommitOp>& ops_vec, uint64_t expire_to);
157
158 // If the on-disk queue is empty and we are not currently processing
159 // anything.
160 bool is_idle() const;
161
162 /**
163 * Signal to the PurgeQueue that you would like it to hurry up and
164 * finish consuming everything in the queue. Provides progress
165 * feedback.
166 *
167 * @param progress: bytes consumed since we started draining
168 * @param progress_total: max bytes that were outstanding during purge
169 * @param in_flight_count: number of file purges currently in flight
170 *
171 * @returns true if drain is complete
172 */
173 bool drain(
174 uint64_t *progress,
175 uint64_t *progress_total,
176 size_t *in_flight_count);
177
178 void update_op_limit(const MDSMap &mds_map);
179
180 void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map);
181
182 private:
183 uint32_t _calculate_ops(const PurgeItem &item) const;
184
185 bool _can_consume();
186
187 // recover the journal write_pos (drop any partial written entry)
188 void _recover();
189
190 /**
191 * @return true if we were in a position to try and consume something:
192 * does not mean we necessarily did.
193 */
194 bool _consume();
195
196 void _execute_item(const PurgeItem &item, uint64_t expire_to);
197 void _execute_item_complete(uint64_t expire_to);
198
199 void _go_readonly(int r);
200
201 CephContext *cct;
202 const mds_rank_t rank;
203 ceph::mutex lock = ceph::make_mutex("PurgeQueue");
204 bool readonly = false;
205
206 int64_t metadata_pool;
207
208 // Don't use the MDSDaemon's Finisher and Timer, because this class
209 // operates outside of MDSDaemon::mds_lock
210 Finisher finisher;
211 SafeTimer timer;
212 Filer filer;
213 Objecter *objecter;
214 std::unique_ptr<PerfCounters> logger;
215
216 Journaler journaler;
217
218 Context *on_error;
219
220 // Map of Journaler offset to PurgeItem
221 std::map<uint64_t, PurgeItem> in_flight;
222
223 std::set<uint64_t> pending_expire;
224
225 // Throttled allowances
226 uint64_t ops_in_flight = 0;
227
228 // Dynamic op limit per MDS based on PG count
229 uint64_t max_purge_ops = 0;
230
231 // How many bytes were remaining when drain() was first called,
232 // used for indicating progress.
233 uint64_t drain_initial = 0;
234
235 // Has drain() ever been called on this instance?
236 bool draining = false;
237
238 // Do we currently have a flush timer event waiting?
239 Context *delayed_flush = nullptr;
240
241 bool recovered = false;
242 std::vector<Context*> waiting_for_recovery;
243
244 size_t purge_item_journal_size;
245
246 uint64_t ops_high_water = 0;
247 uint64_t files_high_water = 0;
248 };
249 #endif