]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef PURGE_QUEUE_H_ | |
16 | #define PURGE_QUEUE_H_ | |
17 | ||
18 | #include "include/compact_set.h" | |
19 | #include "mds/MDSMap.h" | |
20 | #include "osdc/Journaler.h" | |
21 | ||
22 | ||
23 | /** | |
24 | * Descriptor of the work associated with purging a file. We record | |
25 | * the minimal amount of information from the inode such as the size | |
26 | * and layout: all other un-needed inode metadata (times, permissions, etc) | |
27 | * has been discarded. | |
28 | */ | |
29 | class PurgeItem | |
30 | { | |
31 | public: | |
32 | enum Action : uint8_t { | |
33 | NONE = 0, | |
34 | PURGE_FILE = 1, | |
35 | TRUNCATE_FILE, | |
36 | PURGE_DIR | |
37 | }; | |
38 | ||
11fdf7f2 TL |
39 | utime_t stamp; |
40 | //None PurgeItem serves as NoOp for splicing out journal entries; | |
41 | //so there has to be a "pad_size" to specify the size of journal | |
42 | //space to be spliced. | |
43 | uint32_t pad_size; | |
7c673cae FG |
44 | Action action; |
45 | inodeno_t ino; | |
46 | uint64_t size; | |
47 | file_layout_t layout; | |
48 | compact_set<int64_t> old_pools; | |
49 | SnapContext snapc; | |
50 | fragtree_t fragtree; | |
51 | ||
52 | PurgeItem() | |
11fdf7f2 | 53 | : pad_size(0), action(NONE), ino(0), size(0) |
7c673cae FG |
54 | {} |
55 | ||
56 | void encode(bufferlist &bl) const; | |
11fdf7f2 TL |
57 | void decode(bufferlist::const_iterator &p); |
58 | ||
59 | static Action str_to_type(std::string_view str) { | |
60 | return PurgeItem::actions.at(std::string(str)); | |
61 | } | |
62 | ||
63 | void dump(Formatter *f) const | |
64 | { | |
65 | f->dump_int("action", action); | |
66 | f->dump_int("ino", ino); | |
67 | f->dump_int("size", size); | |
68 | f->open_object_section("layout"); | |
69 | layout.dump(f); | |
70 | f->close_section(); | |
71 | f->open_object_section("SnapContext"); | |
72 | snapc.dump(f); | |
73 | f->close_section(); | |
74 | f->open_object_section("fragtree"); | |
75 | fragtree.dump(f); | |
76 | f->close_section(); | |
77 | } | |
78 | ||
79 | std::string_view get_type_str() const; | |
80 | private: | |
81 | static const std::map<std::string, PurgeItem::Action> actions; | |
7c673cae FG |
82 | }; |
83 | WRITE_CLASS_ENCODER(PurgeItem) | |
84 | ||
85 | enum { | |
86 | l_pq_first = 3500, | |
87 | ||
88 | // How many items have been finished by PurgeQueue | |
89 | l_pq_executing_ops, | |
90 | l_pq_executing, | |
91 | l_pq_executed, | |
92 | l_pq_last | |
93 | }; | |
94 | ||
95 | /** | |
96 | * A persistent queue of PurgeItems. This class both writes and reads | |
97 | * to the queue. There is one of these per MDS rank. | |
98 | * | |
99 | * Note that this class does not take a reference to MDSRank: we are | |
100 | * independent of all the metadata structures and do not need to | |
101 | * take mds_lock for anything. | |
102 | */ | |
103 | class PurgeQueue | |
104 | { | |
105 | protected: | |
106 | CephContext *cct; | |
107 | const mds_rank_t rank; | |
108 | Mutex lock; | |
f64942e4 | 109 | bool readonly = false; |
7c673cae FG |
110 | |
111 | int64_t metadata_pool; | |
112 | ||
113 | // Don't use the MDSDaemon's Finisher and Timer, because this class | |
114 | // operates outside of MDSDaemon::mds_lock | |
115 | Finisher finisher; | |
116 | SafeTimer timer; | |
117 | Filer filer; | |
118 | Objecter *objecter; | |
119 | std::unique_ptr<PerfCounters> logger; | |
120 | ||
121 | Journaler journaler; | |
122 | ||
123 | Context *on_error; | |
124 | ||
125 | // Map of Journaler offset to PurgeItem | |
126 | std::map<uint64_t, PurgeItem> in_flight; | |
127 | ||
11fdf7f2 TL |
128 | std::set<uint64_t> pending_expire; |
129 | ||
7c673cae FG |
130 | // Throttled allowances |
131 | uint64_t ops_in_flight; | |
132 | ||
133 | // Dynamic op limit per MDS based on PG count | |
134 | uint64_t max_purge_ops; | |
135 | ||
136 | uint32_t _calculate_ops(const PurgeItem &item) const; | |
137 | ||
f64942e4 | 138 | bool _can_consume(); |
7c673cae FG |
139 | |
140 | // How many bytes were remaining when drain() was first called, | |
141 | // used for indicating progress. | |
142 | uint64_t drain_initial; | |
143 | ||
144 | // Has drain() ever been called on this instance? | |
145 | bool draining; | |
146 | ||
147 | // recover the journal write_pos (drop any partial written entry) | |
3efd9988 | 148 | void _recover(); |
7c673cae FG |
149 | |
150 | /** | |
151 | * @return true if we were in a position to try and consume something: | |
152 | * does not mean we necessarily did. | |
153 | */ | |
154 | bool _consume(); | |
155 | ||
156 | // Do we currently have a flush timer event waiting? | |
157 | Context *delayed_flush; | |
158 | ||
159 | void _execute_item( | |
160 | const PurgeItem &item, | |
161 | uint64_t expire_to); | |
162 | void _execute_item_complete( | |
163 | uint64_t expire_to); | |
164 | ||
3efd9988 FG |
165 | bool recovered; |
166 | std::list<Context*> waiting_for_recovery; | |
7c673cae | 167 | |
f64942e4 AA |
168 | void _go_readonly(int r); |
169 | ||
7c673cae FG |
170 | public: |
171 | void init(); | |
c07f9fc5 | 172 | void activate(); |
7c673cae FG |
173 | void shutdown(); |
174 | ||
175 | void create_logger(); | |
176 | ||
177 | // Write an empty queue, use this during MDS rank creation | |
178 | void create(Context *completion); | |
179 | ||
180 | // Read the Journaler header for an existing queue and start consuming | |
181 | void open(Context *completion); | |
182 | ||
3efd9988 FG |
183 | void wait_for_recovery(Context *c); |
184 | ||
7c673cae FG |
185 | // Submit one entry to the work queue. Call back when it is persisted |
186 | // to the queue (there is no callback for when it is executed) | |
187 | void push(const PurgeItem &pi, Context *completion); | |
188 | ||
189 | // If the on-disk queue is empty and we are not currently processing | |
190 | // anything. | |
191 | bool is_idle() const; | |
192 | ||
193 | /** | |
194 | * Signal to the PurgeQueue that you would like it to hurry up and | |
195 | * finish consuming everything in the queue. Provides progress | |
196 | * feedback. | |
197 | * | |
198 | * @param progress: bytes consumed since we started draining | |
199 | * @param progress_total: max bytes that were outstanding during purge | |
200 | * @param in_flight_count: number of file purges currently in flight | |
201 | * | |
202 | * @returns true if drain is complete | |
203 | */ | |
204 | bool drain( | |
205 | uint64_t *progress, | |
206 | uint64_t *progress_total, | |
207 | size_t *in_flight_count); | |
208 | ||
209 | void update_op_limit(const MDSMap &mds_map); | |
210 | ||
11fdf7f2 | 211 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
212 | const std::set <std::string> &changed, |
213 | const MDSMap &mds_map); | |
214 | ||
215 | PurgeQueue( | |
216 | CephContext *cct_, | |
217 | mds_rank_t rank_, | |
218 | const int64_t metadata_pool_, | |
219 | Objecter *objecter_, | |
220 | Context *on_error); | |
221 | ~PurgeQueue(); | |
222 | }; | |
223 | ||
224 | ||
225 | #endif | |
226 |