]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDSCacheObject.h
update sources to 12.2.10
[ceph.git] / ceph / src / mds / MDSCacheObject.h
1 #ifndef CEPH_MDSCACHEOBJECT_H
2 #define CEPH_MDSCACHEOBJECT_H
3
4 #include <ostream>
5
6 #include "common/config.h"
7
8 #include "include/Context.h"
9 #include "include/alloc_ptr.h"
10 #include "include/assert.h"
11 #include "include/mempool.h"
12 #include "include/types.h"
13 #include "include/xlist.h"
14
15 #include "mdstypes.h"
16
17 #define MDS_REF_SET // define me for improved debug output, sanity checking
18 //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
19 //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
20
21
22 class MLock;
23 class SimpleLock;
24 class MDSCacheObject;
25 class MDSInternalContextBase;
26
27 /*
28 * for metadata leases to clients
29 */
30 struct ClientLease {
31 client_t client;
32 MDSCacheObject *parent;
33
34 ceph_seq_t seq = 0;
35 utime_t ttl;
36 xlist<ClientLease*>::item item_session_lease; // per-session list
37 xlist<ClientLease*>::item item_lease; // global list
38
39 ClientLease(client_t c, MDSCacheObject *p) :
40 client(c), parent(p),
41 item_session_lease(this),
42 item_lease(this) { }
43 ClientLease() = delete;
44 };
45
46
47 // print hack
48 struct mdsco_db_line_prefix {
49 MDSCacheObject *object;
50 explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
51 };
52 std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o);
53
54 // printer
55 std::ostream& operator<<(std::ostream& out, const MDSCacheObject &o);
56
57 class MDSCacheObject {
58 public:
59 // -- pins --
60 const static int PIN_REPLICATED = 1000;
61 const static int PIN_DIRTY = 1001;
62 const static int PIN_LOCK = -1002;
63 const static int PIN_REQUEST = -1003;
64 const static int PIN_WAITER = 1004;
65 const static int PIN_DIRTYSCATTERED = -1005;
66 static const int PIN_AUTHPIN = 1006;
67 static const int PIN_PTRWAITER = -1007;
68 const static int PIN_TEMPEXPORTING = 1008; // temp pin between encode_ and finish_export
69 static const int PIN_CLIENTLEASE = 1009;
70 static const int PIN_DISCOVERBASE = 1010;
71
72 const char *generic_pin_name(int p) const {
73 switch (p) {
74 case PIN_REPLICATED: return "replicated";
75 case PIN_DIRTY: return "dirty";
76 case PIN_LOCK: return "lock";
77 case PIN_REQUEST: return "request";
78 case PIN_WAITER: return "waiter";
79 case PIN_DIRTYSCATTERED: return "dirtyscattered";
80 case PIN_AUTHPIN: return "authpin";
81 case PIN_PTRWAITER: return "ptrwaiter";
82 case PIN_TEMPEXPORTING: return "tempexporting";
83 case PIN_CLIENTLEASE: return "clientlease";
84 case PIN_DISCOVERBASE: return "discoverbase";
85 default: ceph_abort(); return 0;
86 }
87 }
88
89 // -- state --
90 const static int STATE_AUTH = (1<<30);
91 const static int STATE_DIRTY = (1<<29);
92 const static int STATE_NOTIFYREF = (1<<28); // notify dropping ref drop through _put()
93 const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy
94 const static int STATE_REJOINUNDEF = (1<<26); // contents undefined.
95
96
97 // -- wait --
98 const static uint64_t WAIT_ORDERED = (1ull<<61);
99 const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
100 const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
101
102
103 // ============================================
104 // cons
105 public:
106 MDSCacheObject() {}
107 virtual ~MDSCacheObject() {}
108
109 // printing
110 virtual void print(std::ostream& out) = 0;
111 virtual std::ostream& print_db_line_prefix(std::ostream& out) {
112 return out << "mdscacheobject(" << this << ") ";
113 }
114
115 // --------------------------------------------
116 // state
117 protected:
118 __u32 state = 0; // state bits
119
120 public:
121 unsigned get_state() const { return state; }
122 unsigned state_test(unsigned mask) const { return (state & mask); }
123 void state_clear(unsigned mask) { state &= ~mask; }
124 void state_set(unsigned mask) { state |= mask; }
125 void state_reset(unsigned s) { state = s; }
126
127 bool is_auth() const { return state_test(STATE_AUTH); }
128 bool is_dirty() const { return state_test(STATE_DIRTY); }
129 bool is_clean() const { return !is_dirty(); }
130 bool is_rejoining() const { return state_test(STATE_REJOINING); }
131
132 // --------------------------------------------
133 // authority
134 virtual mds_authority_t authority() const = 0;
135 bool is_ambiguous_auth() const {
136 return authority().second != CDIR_AUTH_UNKNOWN;
137 }
138
139 // --------------------------------------------
140 // pins
141 protected:
142 __s32 ref = 0; // reference count
143 #ifdef MDS_REF_SET
144 mempool::mds_co::map<int,int> ref_map;
145 #endif
146
147 public:
148 int get_num_ref(int by = -1) const {
149 #ifdef MDS_REF_SET
150 if (by >= 0) {
151 if (ref_map.find(by) == ref_map.end()) {
152 return 0;
153 } else {
154 return ref_map.find(by)->second;
155 }
156 }
157 #endif
158 return ref;
159 }
160 virtual const char *pin_name(int by) const = 0;
161 //bool is_pinned_by(int by) { return ref_set.count(by); }
162 //multiset<int>& get_ref_set() { return ref_set; }
163
164 virtual void last_put() {}
165 virtual void bad_put(int by) {
166 #ifdef MDS_REF_SET
167 assert(ref_map[by] > 0);
168 #endif
169 assert(ref > 0);
170 }
171 virtual void _put() {}
172 void put(int by) {
173 #ifdef MDS_REF_SET
174 if (ref == 0 || ref_map[by] == 0) {
175 #else
176 if (ref == 0) {
177 #endif
178 bad_put(by);
179 } else {
180 ref--;
181 #ifdef MDS_REF_SET
182 ref_map[by]--;
183 #endif
184 if (ref == 0)
185 last_put();
186 if (state_test(STATE_NOTIFYREF))
187 _put();
188 }
189 }
190
191 virtual void first_get() {}
192 virtual void bad_get(int by) {
193 #ifdef MDS_REF_SET
194 assert(by < 0 || ref_map[by] == 0);
195 #endif
196 ceph_abort();
197 }
198 void get(int by) {
199 if (ref == 0)
200 first_get();
201 ref++;
202 #ifdef MDS_REF_SET
203 if (ref_map.find(by) == ref_map.end())
204 ref_map[by] = 0;
205 ref_map[by]++;
206 #endif
207 }
208
209 void print_pin_set(std::ostream& out) const {
210 #ifdef MDS_REF_SET
211 std::map<int, int>::const_iterator it = ref_map.begin();
212 while (it != ref_map.end()) {
213 out << " " << pin_name(it->first) << "=" << it->second;
214 ++it;
215 }
216 #else
217 out << " nref=" << ref;
218 #endif
219 }
220
221 protected:
222 int auth_pins = 0;
223 int nested_auth_pins = 0;
224 #ifdef MDS_AUTHPIN_SET
225 mempool::mds_co::multiset<void*> auth_pin_set;
226 #endif
227
228 public:
229 bool is_auth_pinned() const { return auth_pins || nested_auth_pins; }
230 int get_num_auth_pins() const { return auth_pins; }
231 int get_num_nested_auth_pins() const { return nested_auth_pins; }
232
233 void dump_states(Formatter *f) const;
234 void dump(Formatter *f) const;
235
236 // --------------------------------------------
237 // auth pins
238 enum {
239 // can_auth_pin() error codes
240 ERR_NOT_AUTH = 1,
241 ERR_EXPORTING_TREE,
242 ERR_FRAGMENTING_DIR,
243 ERR_EXPORTING_INODE,
244 };
245 virtual bool can_auth_pin(int *err_code=nullptr) const = 0;
246 virtual void auth_pin(void *who) = 0;
247 virtual void auth_unpin(void *who) = 0;
248 virtual bool is_frozen() const = 0;
249 virtual bool is_freezing() const = 0;
250 virtual bool is_freezing_or_frozen() const {
251 return is_frozen() || is_freezing();
252 }
253
254
255 // --------------------------------------------
256 // replication (across mds cluster)
257 protected:
258 unsigned replica_nonce = 0; // [replica] defined on replica
259 typedef mempool::mds_co::compact_map<mds_rank_t,unsigned> replica_map_type;
260 replica_map_type replica_map; // [auth] mds -> nonce
261
262 public:
263 bool is_replicated() const { return !get_replicas().empty(); }
264 bool is_replica(mds_rank_t mds) const { return get_replicas().count(mds); }
265 int num_replicas() const { return get_replicas().size(); }
266 unsigned add_replica(mds_rank_t mds) {
267 if (get_replicas().count(mds))
268 return ++get_replicas()[mds]; // inc nonce
269 if (get_replicas().empty())
270 get(PIN_REPLICATED);
271 return get_replicas()[mds] = 1;
272 }
273 void add_replica(mds_rank_t mds, unsigned nonce) {
274 if (get_replicas().empty())
275 get(PIN_REPLICATED);
276 get_replicas()[mds] = nonce;
277 }
278 unsigned get_replica_nonce(mds_rank_t mds) {
279 assert(get_replicas().count(mds));
280 return get_replicas()[mds];
281 }
282 void remove_replica(mds_rank_t mds) {
283 assert(get_replicas().count(mds));
284 get_replicas().erase(mds);
285 if (get_replicas().empty()) {
286 put(PIN_REPLICATED);
287 }
288 }
289 void clear_replica_map() {
290 if (!get_replicas().empty())
291 put(PIN_REPLICATED);
292 replica_map.clear();
293 }
294 replica_map_type& get_replicas() { return replica_map; }
295 const replica_map_type& get_replicas() const { return replica_map; }
296 void list_replicas(std::set<mds_rank_t>& ls) const {
297 for (const auto &p : get_replicas()) {
298 ls.insert(p.first);
299 }
300 }
301
302 unsigned get_replica_nonce() const { return replica_nonce; }
303 void set_replica_nonce(unsigned n) { replica_nonce = n; }
304
305
306 // ---------------------------------------------
307 // waiting
308 private:
309 alloc_ptr<mempool::mds_co::multimap<uint64_t, std::pair<uint64_t, MDSInternalContextBase*>>> waiting;
310 static uint64_t last_wait_seq;
311
312 public:
313 bool is_waiter_for(uint64_t mask, uint64_t min=0) {
314 if (!min) {
315 min = mask;
316 while (min & (min-1)) // if more than one bit is set
317 min &= min-1; // clear LSB
318 }
319 if (waiting) {
320 for (auto p = waiting->lower_bound(min); p != waiting->end(); ++p) {
321 if (p->first & mask) return true;
322 if (p->first > mask) return false;
323 }
324 }
325 return false;
326 }
327 virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) {
328 if (waiting->empty())
329 get(PIN_WAITER);
330
331 uint64_t seq = 0;
332 if (mask & WAIT_ORDERED) {
333 seq = ++last_wait_seq;
334 mask &= ~WAIT_ORDERED;
335 }
336 waiting->insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >(
337 mask,
338 pair<uint64_t, MDSInternalContextBase*>(seq, c)));
339 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
340 // << "add_waiter " << hex << mask << dec << " " << c
341 // << " on " << *this
342 // << dendl;
343
344 }
345 virtual void take_waiting(uint64_t mask, std::list<MDSInternalContextBase*>& ls) {
346 if (!waiting || waiting->empty()) return;
347
348 // process ordered waiters in the same order that they were added.
349 std::map<uint64_t, MDSInternalContextBase*> ordered_waiters;
350
351 for (auto it = waiting->begin(); it != waiting->end(); ) {
352 if (it->first & mask) {
353 if (it->second.first > 0) {
354 ordered_waiters.insert(it->second);
355 } else {
356 ls.push_back(it->second.second);
357 }
358 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
359 // << "take_waiting mask " << hex << mask << dec << " took " << it->second
360 // << " tag " << hex << it->first << dec
361 // << " on " << *this
362 // << dendl;
363 waiting->erase(it++);
364 } else {
365 // pdout(10,g_conf->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second
366 // << " tag " << hex << it->first << dec
367 // << " on " << *this
368 // << dendl;
369 ++it;
370 }
371 }
372 for (auto it = ordered_waiters.begin(); it != ordered_waiters.end(); ++it) {
373 ls.push_back(it->second);
374 }
375 if (waiting->empty()) {
376 put(PIN_WAITER);
377 waiting.reset();
378 }
379 }
380 void finish_waiting(uint64_t mask, int result = 0);
381
382 // ---------------------------------------------
383 // locking
384 // noop unless overloaded.
385 virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; }
386 virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); }
387 virtual void encode_lock_state(int type, bufferlist& bl) { ceph_abort(); }
388 virtual void decode_lock_state(int type, bufferlist& bl) { ceph_abort(); }
389 virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); }
390 virtual void add_lock_waiter(int type, uint64_t mask, MDSInternalContextBase *c) { ceph_abort(); }
391 virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; }
392
393 virtual void clear_dirty_scattered(int type) { ceph_abort(); }
394
395 // ---------------------------------------------
396 // ordering
397 virtual bool is_lt(const MDSCacheObject *r) const = 0;
398 struct ptr_lt {
399 bool operator()(const MDSCacheObject* l, const MDSCacheObject* r) const {
400 return l->is_lt(r);
401 }
402 };
403
404 };
405
406 inline std::ostream& operator<<(std::ostream& out, MDSCacheObject &o) {
407 o.print(out);
408 return out;
409 }
410
411 inline std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o) {
412 o.object->print_db_line_prefix(out);
413 return out;
414 }
415
416 #endif