]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDSCacheObject.h
1bc80cfed67307f6f22792f44e1d397e35022907
1 #ifndef CEPH_MDSCACHEOBJECT_H
2 #define CEPH_MDSCACHEOBJECT_H
10 #include "common/config.h"
11 #include "include/assert.h"
12 #include "include/types.h"
13 #include "include/xlist.h"
14 #include "include/Context.h"
17 #define MDS_REF_SET // define me for improved debug output, sanity checking
18 //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
19 //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
25 class MDSInternalContextBase
;
28 * for metadata leases to clients
32 MDSCacheObject
*parent
;
36 xlist
<ClientLease
*>::item item_session_lease
; // per-session list
37 xlist
<ClientLease
*>::item item_lease
; // global list
39 ClientLease(client_t c
, MDSCacheObject
*p
) :
40 client(c
), parent(p
), seq(0),
41 item_session_lease(this),
47 struct mdsco_db_line_prefix
{
48 MDSCacheObject
*object
;
49 explicit mdsco_db_line_prefix(MDSCacheObject
*o
) : object(o
) {}
51 std::ostream
& operator<<(std::ostream
& out
, const mdsco_db_line_prefix
& o
);
54 std::ostream
& operator<<(std::ostream
& out
, const MDSCacheObject
&o
);
56 class MDSCacheObject
{
59 const static int PIN_REPLICATED
= 1000;
60 const static int PIN_DIRTY
= 1001;
61 const static int PIN_LOCK
= -1002;
62 const static int PIN_REQUEST
= -1003;
63 const static int PIN_WAITER
= 1004;
64 const static int PIN_DIRTYSCATTERED
= -1005;
65 static const int PIN_AUTHPIN
= 1006;
66 static const int PIN_PTRWAITER
= -1007;
67 const static int PIN_TEMPEXPORTING
= 1008; // temp pin between encode_ and finish_export
68 static const int PIN_CLIENTLEASE
= 1009;
69 static const int PIN_DISCOVERBASE
= 1010;
71 const char *generic_pin_name(int p
) const {
73 case PIN_REPLICATED
: return "replicated";
74 case PIN_DIRTY
: return "dirty";
75 case PIN_LOCK
: return "lock";
76 case PIN_REQUEST
: return "request";
77 case PIN_WAITER
: return "waiter";
78 case PIN_DIRTYSCATTERED
: return "dirtyscattered";
79 case PIN_AUTHPIN
: return "authpin";
80 case PIN_PTRWAITER
: return "ptrwaiter";
81 case PIN_TEMPEXPORTING
: return "tempexporting";
82 case PIN_CLIENTLEASE
: return "clientlease";
83 case PIN_DISCOVERBASE
: return "discoverbase";
84 default: ceph_abort(); return 0;
89 const static int STATE_AUTH
= (1<<30);
90 const static int STATE_DIRTY
= (1<<29);
91 const static int STATE_NOTIFYREF
= (1<<28); // notify dropping ref drop through _put()
92 const static int STATE_REJOINING
= (1<<27); // replica has not joined w/ primary copy
93 const static int STATE_REJOINUNDEF
= (1<<26); // contents undefined.
97 const static uint64_t WAIT_ORDERED
= (1ull<<61);
98 const static uint64_t WAIT_SINGLEAUTH
= (1ull<<60);
99 const static uint64_t WAIT_UNFREEZE
= (1ull<<59); // pka AUTHPINNABLE
102 // ============================================
108 auth_pins(0), nested_auth_pins(0),
111 virtual ~MDSCacheObject() {}
114 virtual void print(std::ostream
& out
) = 0;
115 virtual std::ostream
& print_db_line_prefix(std::ostream
& out
) {
116 return out
<< "mdscacheobject(" << this << ") ";
119 // --------------------------------------------
122 __u32 state
; // state bits
125 unsigned get_state() const { return state
; }
126 unsigned state_test(unsigned mask
) const { return (state
& mask
); }
127 void state_clear(unsigned mask
) { state
&= ~mask
; }
128 void state_set(unsigned mask
) { state
|= mask
; }
129 void state_reset(unsigned s
) { state
= s
; }
131 bool is_auth() const { return state_test(STATE_AUTH
); }
132 bool is_dirty() const { return state_test(STATE_DIRTY
); }
133 bool is_clean() const { return !is_dirty(); }
134 bool is_rejoining() const { return state_test(STATE_REJOINING
); }
136 // --------------------------------------------
138 virtual mds_authority_t
authority() const = 0;
139 bool is_ambiguous_auth() const {
140 return authority().second
!= CDIR_AUTH_UNKNOWN
;
143 // --------------------------------------------
146 __s32 ref
; // reference count
148 std::map
<int,int> ref_map
;
152 int get_num_ref(int by
= -1) const {
155 if (ref_map
.find(by
) == ref_map
.end()) {
158 return ref_map
.find(by
)->second
;
164 virtual const char *pin_name(int by
) const = 0;
165 //bool is_pinned_by(int by) { return ref_set.count(by); }
166 //multiset<int>& get_ref_set() { return ref_set; }
168 virtual void last_put() {}
169 virtual void bad_put(int by
) {
171 assert(ref_map
[by
] > 0);
175 virtual void _put() {}
178 if (ref
== 0 || ref_map
[by
] == 0) {
190 if (state_test(STATE_NOTIFYREF
))
195 virtual void first_get() {}
196 virtual void bad_get(int by
) {
198 assert(by
< 0 || ref_map
[by
] == 0);
207 if (ref_map
.find(by
) == ref_map
.end())
213 void print_pin_set(std::ostream
& out
) const {
215 std::map
<int, int>::const_iterator it
= ref_map
.begin();
216 while (it
!= ref_map
.end()) {
217 out
<< " " << pin_name(it
->first
) << "=" << it
->second
;
221 out
<< " nref=" << ref
;
227 int nested_auth_pins
;
228 #ifdef MDS_AUTHPIN_SET
229 multiset
<void*> auth_pin_set
;
233 bool is_auth_pinned() const { return auth_pins
|| nested_auth_pins
; }
234 int get_num_auth_pins() const { return auth_pins
; }
235 int get_num_nested_auth_pins() const { return nested_auth_pins
; }
237 void dump_states(Formatter
*f
) const;
238 void dump(Formatter
*f
) const;
240 // --------------------------------------------
242 virtual bool can_auth_pin() const = 0;
243 virtual void auth_pin(void *who
) = 0;
244 virtual void auth_unpin(void *who
) = 0;
245 virtual bool is_frozen() const = 0;
246 virtual bool is_freezing() const = 0;
247 virtual bool is_freezing_or_frozen() const {
248 return is_frozen() || is_freezing();
252 // --------------------------------------------
253 // replication (across mds cluster)
255 unsigned replica_nonce
; // [replica] defined on replica
256 compact_map
<mds_rank_t
,unsigned> replica_map
; // [auth] mds -> nonce
259 bool is_replicated() const { return !replica_map
.empty(); }
260 bool is_replica(mds_rank_t mds
) const { return replica_map
.count(mds
); }
261 int num_replicas() const { return replica_map
.size(); }
262 unsigned add_replica(mds_rank_t mds
) {
263 if (replica_map
.count(mds
))
264 return ++replica_map
[mds
]; // inc nonce
265 if (replica_map
.empty())
267 return replica_map
[mds
] = 1;
269 void add_replica(mds_rank_t mds
, unsigned nonce
) {
270 if (replica_map
.empty())
272 replica_map
[mds
] = nonce
;
274 unsigned get_replica_nonce(mds_rank_t mds
) {
275 assert(replica_map
.count(mds
));
276 return replica_map
[mds
];
278 void remove_replica(mds_rank_t mds
) {
279 assert(replica_map
.count(mds
));
280 replica_map
.erase(mds
);
281 if (replica_map
.empty())
284 void clear_replica_map() {
285 if (!replica_map
.empty())
289 compact_map
<mds_rank_t
,unsigned>::iterator
replicas_begin() { return replica_map
.begin(); }
290 compact_map
<mds_rank_t
,unsigned>::iterator
replicas_end() { return replica_map
.end(); }
291 const compact_map
<mds_rank_t
,unsigned>& get_replicas() const { return replica_map
; }
292 void list_replicas(std::set
<mds_rank_t
>& ls
) const {
293 for (compact_map
<mds_rank_t
,unsigned>::const_iterator p
= replica_map
.begin();
294 p
!= replica_map
.end();
299 unsigned get_replica_nonce() const { return replica_nonce
; }
300 void set_replica_nonce(unsigned n
) { replica_nonce
= n
; }
303 // ---------------------------------------------
306 compact_multimap
<uint64_t, pair
<uint64_t, MDSInternalContextBase
*> > waiting
;
307 static uint64_t last_wait_seq
;
310 bool is_waiter_for(uint64_t mask
, uint64_t min
=0) {
313 while (min
& (min
-1)) // if more than one bit is set
314 min
&= min
-1; // clear LSB
316 for (auto p
= waiting
.lower_bound(min
);
319 if (p
->first
& mask
) return true;
320 if (p
->first
> mask
) return false;
324 virtual void add_waiter(uint64_t mask
, MDSInternalContextBase
*c
) {
329 if (mask
& WAIT_ORDERED
) {
330 seq
= ++last_wait_seq
;
331 mask
&= ~WAIT_ORDERED
;
333 waiting
.insert(pair
<uint64_t, pair
<uint64_t, MDSInternalContextBase
*> >(
335 pair
<uint64_t, MDSInternalContextBase
*>(seq
, c
)));
336 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
337 // << "add_waiter " << hex << mask << dec << " " << c
338 // << " on " << *this
342 virtual void take_waiting(uint64_t mask
, list
<MDSInternalContextBase
*>& ls
) {
343 if (waiting
.empty()) return;
345 // process ordered waiters in the same order that they were added.
346 std::map
<uint64_t, MDSInternalContextBase
*> ordered_waiters
;
348 for (auto it
= waiting
.begin();
349 it
!= waiting
.end(); ) {
350 if (it
->first
& mask
) {
352 if (it
->second
.first
> 0)
353 ordered_waiters
.insert(it
->second
);
355 ls
.push_back(it
->second
.second
);
356 // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this))
357 // << "take_waiting mask " << hex << mask << dec << " took " << it->second
358 // << " tag " << hex << it->first << dec
359 // << " on " << *this
363 // pdout(10,g_conf->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second
364 // << " tag " << hex << it->first << dec
365 // << " on " << *this
370 for (auto it
= ordered_waiters
.begin();
371 it
!= ordered_waiters
.end();
373 ls
.push_back(it
->second
);
378 void finish_waiting(uint64_t mask
, int result
= 0);
380 // ---------------------------------------------
382 // noop unless overloaded.
383 virtual SimpleLock
* get_lock(int type
) { ceph_abort(); return 0; }
384 virtual void set_object_info(MDSCacheObjectInfo
&info
) { ceph_abort(); }
385 virtual void encode_lock_state(int type
, bufferlist
& bl
) { ceph_abort(); }
386 virtual void decode_lock_state(int type
, bufferlist
& bl
) { ceph_abort(); }
387 virtual void finish_lock_waiters(int type
, uint64_t mask
, int r
=0) { ceph_abort(); }
388 virtual void add_lock_waiter(int type
, uint64_t mask
, MDSInternalContextBase
*c
) { ceph_abort(); }
389 virtual bool is_lock_waiting(int type
, uint64_t mask
) { ceph_abort(); return false; }
391 virtual void clear_dirty_scattered(int type
) { ceph_abort(); }
393 // ---------------------------------------------
395 virtual bool is_lt(const MDSCacheObject
*r
) const = 0;
397 bool operator()(const MDSCacheObject
* l
, const MDSCacheObject
* r
) const {
404 inline std::ostream
& operator<<(std::ostream
& out
, MDSCacheObject
&o
) {
409 inline std::ostream
& operator<<(std::ostream
& out
, const mdsco_db_line_prefix
& o
) {
410 o
.object
->print_db_line_prefix(out
);