]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef PURGE_QUEUE_H_ | |
16 | #define PURGE_QUEUE_H_ | |
17 | ||
18 | #include "include/compact_set.h" | |
19 | #include "mds/MDSMap.h" | |
20 | #include "osdc/Journaler.h" | |
21 | ||
22 | ||
23 | /** | |
24 | * Descriptor of the work associated with purging a file. We record | |
25 | * the minimal amount of information from the inode such as the size | |
26 | * and layout: all other un-needed inode metadata (times, permissions, etc) | |
27 | * has been discarded. | |
28 | */ | |
29 | class PurgeItem | |
30 | { | |
31 | public: | |
32 | enum Action : uint8_t { | |
33 | NONE = 0, | |
34 | PURGE_FILE = 1, | |
35 | TRUNCATE_FILE, | |
36 | PURGE_DIR | |
37 | }; | |
38 | ||
9f95a23c | 39 | PurgeItem() {} |
7c673cae FG |
40 | |
41 | void encode(bufferlist &bl) const; | |
11fdf7f2 TL |
42 | void decode(bufferlist::const_iterator &p); |
43 | ||
44 | static Action str_to_type(std::string_view str) { | |
45 | return PurgeItem::actions.at(std::string(str)); | |
46 | } | |
47 | ||
48 | void dump(Formatter *f) const | |
49 | { | |
50 | f->dump_int("action", action); | |
51 | f->dump_int("ino", ino); | |
52 | f->dump_int("size", size); | |
53 | f->open_object_section("layout"); | |
54 | layout.dump(f); | |
55 | f->close_section(); | |
56 | f->open_object_section("SnapContext"); | |
57 | snapc.dump(f); | |
58 | f->close_section(); | |
59 | f->open_object_section("fragtree"); | |
60 | fragtree.dump(f); | |
61 | f->close_section(); | |
62 | } | |
63 | ||
64 | std::string_view get_type_str() const; | |
9f95a23c TL |
65 | |
66 | utime_t stamp; | |
67 | //None PurgeItem serves as NoOp for splicing out journal entries; | |
68 | //so there has to be a "pad_size" to specify the size of journal | |
69 | //space to be spliced. | |
70 | uint32_t pad_size = 0; | |
71 | Action action = NONE; | |
72 | inodeno_t ino = 0; | |
73 | uint64_t size = 0; | |
74 | file_layout_t layout; | |
75 | compact_set<int64_t> old_pools; | |
76 | SnapContext snapc; | |
77 | fragtree_t fragtree; | |
11fdf7f2 TL |
78 | private: |
79 | static const std::map<std::string, PurgeItem::Action> actions; | |
7c673cae FG |
80 | }; |
81 | WRITE_CLASS_ENCODER(PurgeItem) | |
82 | ||
83 | enum { | |
84 | l_pq_first = 3500, | |
85 | ||
86 | // How many items have been finished by PurgeQueue | |
87 | l_pq_executing_ops, | |
92f5a8d4 | 88 | l_pq_executing_ops_high_water, |
7c673cae | 89 | l_pq_executing, |
92f5a8d4 | 90 | l_pq_executing_high_water, |
7c673cae | 91 | l_pq_executed, |
9f95a23c | 92 | l_pq_item_in_journal, |
7c673cae FG |
93 | l_pq_last |
94 | }; | |
95 | ||
96 | /** | |
97 | * A persistent queue of PurgeItems. This class both writes and reads | |
98 | * to the queue. There is one of these per MDS rank. | |
99 | * | |
100 | * Note that this class does not take a reference to MDSRank: we are | |
101 | * independent of all the metadata structures and do not need to | |
102 | * take mds_lock for anything. | |
103 | */ | |
104 | class PurgeQueue | |
105 | { | |
7c673cae | 106 | public: |
9f95a23c TL |
107 | PurgeQueue( |
108 | CephContext *cct_, | |
109 | mds_rank_t rank_, | |
110 | const int64_t metadata_pool_, | |
111 | Objecter *objecter_, | |
112 | Context *on_error); | |
113 | ~PurgeQueue(); | |
114 | ||
7c673cae | 115 | void init(); |
c07f9fc5 | 116 | void activate(); |
7c673cae FG |
117 | void shutdown(); |
118 | ||
119 | void create_logger(); | |
120 | ||
121 | // Write an empty queue, use this during MDS rank creation | |
122 | void create(Context *completion); | |
123 | ||
124 | // Read the Journaler header for an existing queue and start consuming | |
125 | void open(Context *completion); | |
126 | ||
3efd9988 FG |
127 | void wait_for_recovery(Context *c); |
128 | ||
7c673cae FG |
129 | // Submit one entry to the work queue. Call back when it is persisted |
130 | // to the queue (there is no callback for when it is executed) | |
131 | void push(const PurgeItem &pi, Context *completion); | |
132 | ||
133 | // If the on-disk queue is empty and we are not currently processing | |
134 | // anything. | |
135 | bool is_idle() const; | |
136 | ||
137 | /** | |
138 | * Signal to the PurgeQueue that you would like it to hurry up and | |
139 | * finish consuming everything in the queue. Provides progress | |
140 | * feedback. | |
141 | * | |
142 | * @param progress: bytes consumed since we started draining | |
143 | * @param progress_total: max bytes that were outstanding during purge | |
144 | * @param in_flight_count: number of file purges currently in flight | |
145 | * | |
146 | * @returns true if drain is complete | |
147 | */ | |
148 | bool drain( | |
149 | uint64_t *progress, | |
150 | uint64_t *progress_total, | |
151 | size_t *in_flight_count); | |
152 | ||
153 | void update_op_limit(const MDSMap &mds_map); | |
154 | ||
92f5a8d4 | 155 | void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map); |
7c673cae | 156 | |
9f95a23c TL |
157 | private: |
158 | uint32_t _calculate_ops(const PurgeItem &item) const; | |
7c673cae | 159 | |
9f95a23c TL |
160 | bool _can_consume(); |
161 | ||
162 | // recover the journal write_pos (drop any partial written entry) | |
163 | void _recover(); | |
164 | ||
165 | /** | |
166 | * @return true if we were in a position to try and consume something: | |
167 | * does not mean we necessarily did. | |
168 | */ | |
169 | bool _consume(); | |
170 | ||
171 | void _execute_item(const PurgeItem &item, uint64_t expire_to); | |
172 | void _execute_item_complete(uint64_t expire_to); | |
173 | ||
174 | void _go_readonly(int r); | |
175 | ||
176 | CephContext *cct; | |
177 | const mds_rank_t rank; | |
178 | ceph::mutex lock = ceph::make_mutex("PurgeQueue"); | |
179 | bool readonly = false; | |
7c673cae | 180 | |
9f95a23c TL |
181 | int64_t metadata_pool; |
182 | ||
183 | // Don't use the MDSDaemon's Finisher and Timer, because this class | |
184 | // operates outside of MDSDaemon::mds_lock | |
185 | Finisher finisher; | |
186 | SafeTimer timer; | |
187 | Filer filer; | |
188 | Objecter *objecter; | |
189 | std::unique_ptr<PerfCounters> logger; | |
190 | ||
191 | Journaler journaler; | |
192 | ||
193 | Context *on_error; | |
194 | ||
195 | // Map of Journaler offset to PurgeItem | |
196 | std::map<uint64_t, PurgeItem> in_flight; | |
197 | ||
198 | std::set<uint64_t> pending_expire; | |
199 | ||
200 | // Throttled allowances | |
201 | uint64_t ops_in_flight = 0; | |
202 | ||
203 | // Dynamic op limit per MDS based on PG count | |
204 | uint64_t max_purge_ops = 0; | |
205 | ||
206 | // How many bytes were remaining when drain() was first called, | |
207 | // used for indicating progress. | |
208 | uint64_t drain_initial = 0; | |
209 | ||
210 | // Has drain() ever been called on this instance? | |
211 | bool draining = false; | |
212 | ||
213 | // Do we currently have a flush timer event waiting? | |
214 | Context *delayed_flush = nullptr; | |
215 | ||
216 | bool recovered = false; | |
217 | std::vector<Context*> waiting_for_recovery; | |
218 | ||
219 | size_t purge_item_journal_size; | |
220 | ||
221 | uint64_t ops_high_water = 0; | |
222 | uint64_t files_high_water = 0; | |
223 | }; | |
224 | #endif |