]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDSCacheObject.h
update sources to v12.2.1
[ceph.git] / ceph / src / mds / MDSCacheObject.h
1 #ifndef CEPH_MDSCACHEOBJECT_H
2 #define CEPH_MDSCACHEOBJECT_H
3
4 #include <ostream>
5
6 #include "common/config.h"
7
8 #include "include/Context.h"
9 #include "include/alloc_ptr.h"
10 #include "include/assert.h"
11 #include "include/mempool.h"
12 #include "include/types.h"
13 #include "include/xlist.h"
14
15 #include "mdstypes.h"
16
17 #define MDS_REF_SET // define me for improved debug output, sanity checking
18 //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
19 //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
20
21
22 class MLock;
23 class SimpleLock;
24 class MDSCacheObject;
25 class MDSInternalContextBase;
26
27 /*
28 * for metadata leases to clients
29 */
30 struct ClientLease {
31 client_t client;
32 MDSCacheObject *parent;
33
34 ceph_seq_t seq;
35 utime_t ttl;
36 xlist<ClientLease*>::item item_session_lease; // per-session list
37 xlist<ClientLease*>::item item_lease; // global list
38
39 ClientLease(client_t c, MDSCacheObject *p) :
40 client(c), parent(p), seq(0),
41 item_session_lease(this),
42 item_lease(this) { }
43 };
44
45
46 // print hack
47 struct mdsco_db_line_prefix {
48 MDSCacheObject *object;
49 explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
50 };
51 std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o);
52
53 // printer
54 std::ostream& operator<<(std::ostream& out, const MDSCacheObject &o);
55
56 class MDSCacheObject {
57 public:
58 // -- pins --
59 const static int PIN_REPLICATED = 1000;
60 const static int PIN_DIRTY = 1001;
61 const static int PIN_LOCK = -1002;
62 const static int PIN_REQUEST = -1003;
63 const static int PIN_WAITER = 1004;
64 const static int PIN_DIRTYSCATTERED = -1005;
65 static const int PIN_AUTHPIN = 1006;
66 static const int PIN_PTRWAITER = -1007;
67 const static int PIN_TEMPEXPORTING = 1008; // temp pin between encode_ and finish_export
68 static const int PIN_CLIENTLEASE = 1009;
69 static const int PIN_DISCOVERBASE = 1010;
70
71 const char *generic_pin_name(int p) const {
72 switch (p) {
73 case PIN_REPLICATED: return "replicated";
74 case PIN_DIRTY: return "dirty";
75 case PIN_LOCK: return "lock";
76 case PIN_REQUEST: return "request";
77 case PIN_WAITER: return "waiter";
78 case PIN_DIRTYSCATTERED: return "dirtyscattered";
79 case PIN_AUTHPIN: return "authpin";
80 case PIN_PTRWAITER: return "ptrwaiter";
81 case PIN_TEMPEXPORTING: return "tempexporting";
82 case PIN_CLIENTLEASE: return "clientlease";
83 case PIN_DISCOVERBASE: return "discoverbase";
84 default: ceph_abort(); return 0;
85 }
86 }
87
88 // -- state --
89 const static int STATE_AUTH = (1<<30);
90 const static int STATE_DIRTY = (1<<29);
91 const static int STATE_NOTIFYREF = (1<<28); // notify dropping ref drop through _put()
92 const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy
93 const static int STATE_REJOINUNDEF = (1<<26); // contents undefined.
94
95
96 // -- wait --
97 const static uint64_t WAIT_ORDERED = (1ull<<61);
98 const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
99 const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
100
101
102 // ============================================
103 // cons
104 public:
105 MDSCacheObject() :
106 state(0),
107 ref(0),
108 auth_pins(0), nested_auth_pins(0),
109 replica_nonce(0)
110 {}
111 virtual ~MDSCacheObject() {}
112
113 // printing
114 virtual void print(std::ostream& out) = 0;
115 virtual std::ostream& print_db_line_prefix(std::ostream& out) {
116 return out << "mdscacheobject(" << this << ") ";
117 }
118
119 // --------------------------------------------
120 // state
121 protected:
122 __u32 state; // state bits
123
124 public:
125 unsigned get_state() const { return state; }
126 unsigned state_test(unsigned mask) const { return (state & mask); }
127 void state_clear(unsigned mask) { state &= ~mask; }
128 void state_set(unsigned mask) { state |= mask; }
129 void state_reset(unsigned s) { state = s; }
130
131 bool is_auth() const { return state_test(STATE_AUTH); }
132 bool is_dirty() const { return state_test(STATE_DIRTY); }
133 bool is_clean() const { return !is_dirty(); }
134 bool is_rejoining() const { return state_test(STATE_REJOINING); }
135
136 // --------------------------------------------
137 // authority
138 virtual mds_authority_t authority() const = 0;
139 bool is_ambiguous_auth() const {
140 return authority().second != CDIR_AUTH_UNKNOWN;
141 }
142
143 // --------------------------------------------
144 // pins
145 protected:
146 __s32 ref; // reference count
147 #ifdef MDS_REF_SET
148 mempool::mds_co::map<int,int> ref_map;
149 #endif
150
151 public:
152 int get_num_ref(int by = -1) const {
153 #ifdef MDS_REF_SET
154 if (by >= 0) {
155 if (ref_map.find(by) == ref_map.end()) {
156 return 0;
157 } else {
158 return ref_map.find(by)->second;
159 }
160 }
161 #endif
162 return ref;
163 }
164 virtual const char *pin_name(int by) const = 0;
165 //bool is_pinned_by(int by) { return ref_set.count(by); }
166 //multiset<int>& get_ref_set() { return ref_set; }
167
168 virtual void last_put() {}
169 virtual void bad_put(int by) {
170 #ifdef MDS_REF_SET
171 assert(ref_map[by] > 0);
172 #endif
173 assert(ref > 0);
174 }
175 virtual void _put() {}
176 void put(int by) {
177 #ifdef MDS_REF_SET
178 if (ref == 0 || ref_map[by] == 0) {
179 #else
180 if (ref == 0) {
181 #endif
182 bad_put(by);
183 } else {
184 ref--;
185 #ifdef MDS_REF_SET
186 ref_map[by]--;
187 #endif
188 if (ref == 0)
189 last_put();
190 if (state_test(STATE_NOTIFYREF))
191 _put();
192 }
193 }
194
195 virtual void first_get() {}
196 virtual void bad_get(int by) {
197 #ifdef MDS_REF_SET
198 assert(by < 0 || ref_map[by] == 0);
199 #endif
200 ceph_abort();
201 }
202 void get(int by) {
203 if (ref == 0)
204 first_get();
205 ref++;
206 #ifdef MDS_REF_SET
207 if (ref_map.find(by) == ref_map.end())
208 ref_map[by] = 0;
209 ref_map[by]++;
210 #endif
211 }
212
213 void print_pin_set(std::ostream& out) const {
214 #ifdef MDS_REF_SET
215 std::map<int, int>::const_iterator it = ref_map.begin();
216 while (it != ref_map.end()) {
217 out << " " << pin_name(it->first) << "=" << it->second;
218 ++it;
219 }
220 #else
221 out << " nref=" << ref;
222 #endif
223 }
224
225 protected:
226 int auth_pins;
227 int nested_auth_pins;
228 #ifdef MDS_AUTHPIN_SET
229 mempool::mds_co::multiset<void*> auth_pin_set;
230 #endif
231
232 public:
233 bool is_auth_pinned() const { return auth_pins || nested_auth_pins; }
234 int get_num_auth_pins() const { return auth_pins; }
235 int get_num_nested_auth_pins() const { return nested_auth_pins; }
236
237 void dump_states(Formatter *f) const;
238 void dump(Formatter *f) const;
239
240 // --------------------------------------------
241 // auth pins
242 virtual bool can_auth_pin() const = 0;
243 virtual void auth_pin(void *who) = 0;
244 virtual void auth_unpin(void *who) = 0;
245 virtual bool is_frozen() const = 0;
246 virtual bool is_freezing() const = 0;
247 virtual bool is_freezing_or_frozen() const {
248 return is_frozen() || is_freezing();
249 }
250
251
252 // --------------------------------------------
253 // replication (across mds cluster)
254 protected:
255 unsigned replica_nonce; // [replica] defined on replica
256 typedef compact_map<mds_rank_t,unsigned> replica_map_type;
257 replica_map_type replica_map; // [auth] mds -> nonce
258
259 public:
260 bool is_replicated() const { return !get_replicas().empty(); }
261 bool is_replica(mds_rank_t mds) const { return get_replicas().count(mds); }
262 int num_replicas() const { return get_replicas().size(); }
263 unsigned add_replica(mds_rank_t mds) {
264 if (get_replicas().count(mds))
265 return ++get_replicas()[mds]; // inc nonce
266 if (get_replicas().empty())
267 get(PIN_REPLICATED);
268 return get_replicas()[mds] = 1;
269 }
270 void add_replica(mds_rank_t mds, unsigned nonce) {
271 if (get_replicas().empty())
272 get(PIN_REPLICATED);
273 get_replicas()[mds] = nonce;
274 }
275 unsigned get_replica_nonce(mds_rank_t mds) {
276 assert(get_replicas().count(mds));
277 return get_replicas()[mds];
278 }
279 void remove_replica(mds_rank_t mds) {
280 assert(get_replicas().count(mds));
281 get_replicas().erase(mds);
282 if (get_replicas().empty()) {
283 put(PIN_REPLICATED);
284 }
285 }
286 void clear_replica_map() {
287 if (!get_replicas().empty())
288 put(PIN_REPLICATED);
289 replica_map.clear();
290 }
291 replica_map_type& get_replicas() { return replica_map; }
292 const replica_map_type& get_replicas() const { return replica_map; }
293 void list_replicas(std::set<mds_rank_t>& ls) const {
294 for (const auto &p : get_replicas()) {
295 ls.insert(p.first);
296 }
297 }
298
299 unsigned get_replica_nonce() const { return replica_nonce; }
300 void set_replica_nonce(unsigned n) { replica_nonce = n; }
301
302
303 // ---------------------------------------------
304 // waiting
305 private:
306 alloc_ptr<mempool::mds_co::multimap<uint64_t, std::pair<uint64_t, MDSInternalContextBase*>>> waiting;
307 static uint64_t last_wait_seq;
308
309 public:
310 bool is_waiter_for(uint64_t mask, uint64_t min=0) {
311 if (!min) {
312 min = mask;
313 while (min & (min-1)) // if more than one bit is set
314 min &= min-1; // clear LSB
315 }
316 if (waiting) {
317 for (auto p = waiting->lower_bound(min); p != waiting->end(); ++p) {
318 if (p->first & mask) return true;
319 if (p->first > mask) return false;
320 }
321 }
322 return false;
323 }
324 virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
325 if (waiting->empty())
326 get(PIN_WAITER);
327
328 uint64_t seq = 0;
329 if (mask & WAIT_ORDERED) {
330 seq = ++last_wait_seq;
331 mask &= ~WAIT_ORDERED;
332 }
333 waiting->insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >(
334 mask,
335 pair<uint64_t, MDSInternalContextBase*>(seq, c)));
336 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
337 // << "add_waiter " << hex << mask << dec << " " << c
338 // << " on " << *this
339 // << dendl;
340
341 }
342 virtual void take_waiting(uint64_t mask, std::list<MDSInternalContextBase*>& ls) {
343 if (!waiting || waiting->empty()) return;
344
345 // process ordered waiters in the same order that they were added.
346 std::map<uint64_t, MDSInternalContextBase*> ordered_waiters;
347
348 for (auto it = waiting->begin(); it != waiting->end(); ) {
349 if (it->first & mask) {
350 if (it->second.first > 0) {
351 ordered_waiters.insert(it->second);
352 } else {
353 ls.push_back(it->second.second);
354 }
355 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
356 // << "take_waiting mask " << hex << mask << dec << " took " << it->second
357 // << " tag " << hex << it->first << dec
358 // << " on " << *this
359 // << dendl;
360 waiting->erase(it++);
361 } else {
362 // pdout(10,g_conf->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second
363 // << " tag " << hex << it->first << dec
364 // << " on " << *this
365 // << dendl;
366 ++it;
367 }
368 }
369 for (auto it = ordered_waiters.begin(); it != ordered_waiters.end(); ++it) {
370 ls.push_back(it->second);
371 }
372 if (waiting->empty()) {
373 put(PIN_WAITER);
374 waiting.release();
375 }
376 }
377 void finish_waiting(uint64_t mask, int result = 0);
378
379 // ---------------------------------------------
380 // locking
381 // noop unless overloaded.
382 virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; }
383 virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); }
384 virtual void encode_lock_state(int type, bufferlist& bl) { ceph_abort(); }
385 virtual void decode_lock_state(int type, bufferlist& bl) { ceph_abort(); }
386 virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); }
387 virtual void add_lock_waiter(int type, uint64_t mask, MDSInternalContextBase *c) { ceph_abort(); }
388 virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; }
389
390 virtual void clear_dirty_scattered(int type) { ceph_abort(); }
391
392 // ---------------------------------------------
393 // ordering
394 virtual bool is_lt(const MDSCacheObject *r) const = 0;
395 struct ptr_lt {
396 bool operator()(const MDSCacheObject* l, const MDSCacheObject* r) const {
397 return l->is_lt(r);
398 }
399 };
400
401 };
402
403 inline std::ostream& operator<<(std::ostream& out, MDSCacheObject &o) {
404 o.print(out);
405 return out;
406 }
407
408 inline std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o) {
409 o.object->print_db_line_prefix(out);
410 return out;
411 }
412
413 #endif