]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef CEPH_MDSCACHEOBJECT_H |
2 | #define CEPH_MDSCACHEOBJECT_H | |
3 | ||
4 | #include <set> | |
5 | #include <map> | |
6 | #include <ostream> | |
7 | using namespace std; | |
8 | ||
9 | ||
10 | #include "common/config.h" | |
11 | #include "include/assert.h" | |
12 | #include "include/types.h" | |
13 | #include "include/xlist.h" | |
14 | #include "include/Context.h" | |
15 | #include "mdstypes.h" | |
16 | ||
17 | #define MDS_REF_SET // define me for improved debug output, sanity checking | |
18 | //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks | |
19 | //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags | |
20 | ||
21 | ||
22 | class MLock; | |
23 | class SimpleLock; | |
24 | class MDSCacheObject; | |
25 | class MDSInternalContextBase; | |
26 | ||
27 | /* | |
28 | * for metadata leases to clients | |
29 | */ | |
30 | struct ClientLease { | |
31 | client_t client; | |
32 | MDSCacheObject *parent; | |
33 | ||
34 | ceph_seq_t seq; | |
35 | utime_t ttl; | |
36 | xlist<ClientLease*>::item item_session_lease; // per-session list | |
37 | xlist<ClientLease*>::item item_lease; // global list | |
38 | ||
39 | ClientLease(client_t c, MDSCacheObject *p) : | |
40 | client(c), parent(p), seq(0), | |
41 | item_session_lease(this), | |
42 | item_lease(this) { } | |
43 | }; | |
44 | ||
45 | ||
46 | // print hack | |
47 | struct mdsco_db_line_prefix { | |
48 | MDSCacheObject *object; | |
49 | explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {} | |
50 | }; | |
31f18b77 | 51 | std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o); |
7c673cae FG |
52 | |
53 | // printer | |
31f18b77 | 54 | std::ostream& operator<<(std::ostream& out, const MDSCacheObject &o); |
7c673cae FG |
55 | |
56 | class MDSCacheObject { | |
57 | public: | |
58 | // -- pins -- | |
59 | const static int PIN_REPLICATED = 1000; | |
60 | const static int PIN_DIRTY = 1001; | |
61 | const static int PIN_LOCK = -1002; | |
62 | const static int PIN_REQUEST = -1003; | |
63 | const static int PIN_WAITER = 1004; | |
64 | const static int PIN_DIRTYSCATTERED = -1005; | |
65 | static const int PIN_AUTHPIN = 1006; | |
66 | static const int PIN_PTRWAITER = -1007; | |
67 | const static int PIN_TEMPEXPORTING = 1008; // temp pin between encode_ and finish_export | |
68 | static const int PIN_CLIENTLEASE = 1009; | |
69 | static const int PIN_DISCOVERBASE = 1010; | |
70 | ||
71 | const char *generic_pin_name(int p) const { | |
72 | switch (p) { | |
73 | case PIN_REPLICATED: return "replicated"; | |
74 | case PIN_DIRTY: return "dirty"; | |
75 | case PIN_LOCK: return "lock"; | |
76 | case PIN_REQUEST: return "request"; | |
77 | case PIN_WAITER: return "waiter"; | |
78 | case PIN_DIRTYSCATTERED: return "dirtyscattered"; | |
79 | case PIN_AUTHPIN: return "authpin"; | |
80 | case PIN_PTRWAITER: return "ptrwaiter"; | |
81 | case PIN_TEMPEXPORTING: return "tempexporting"; | |
82 | case PIN_CLIENTLEASE: return "clientlease"; | |
83 | case PIN_DISCOVERBASE: return "discoverbase"; | |
84 | default: ceph_abort(); return 0; | |
85 | } | |
86 | } | |
87 | ||
88 | // -- state -- | |
89 | const static int STATE_AUTH = (1<<30); | |
90 | const static int STATE_DIRTY = (1<<29); | |
91 | const static int STATE_NOTIFYREF = (1<<28); // notify dropping ref drop through _put() | |
92 | const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy | |
93 | const static int STATE_REJOINUNDEF = (1<<26); // contents undefined. | |
94 | ||
95 | ||
96 | // -- wait -- | |
97 | const static uint64_t WAIT_ORDERED = (1ull<<61); | |
98 | const static uint64_t WAIT_SINGLEAUTH = (1ull<<60); | |
99 | const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE | |
100 | ||
101 | ||
102 | // ============================================ | |
103 | // cons | |
104 | public: | |
105 | MDSCacheObject() : | |
106 | state(0), | |
107 | ref(0), | |
108 | auth_pins(0), nested_auth_pins(0), | |
109 | replica_nonce(0) | |
110 | {} | |
111 | virtual ~MDSCacheObject() {} | |
112 | ||
113 | // printing | |
114 | virtual void print(std::ostream& out) = 0; | |
115 | virtual std::ostream& print_db_line_prefix(std::ostream& out) { | |
116 | return out << "mdscacheobject(" << this << ") "; | |
117 | } | |
118 | ||
119 | // -------------------------------------------- | |
120 | // state | |
121 | protected: | |
122 | __u32 state; // state bits | |
123 | ||
124 | public: | |
125 | unsigned get_state() const { return state; } | |
126 | unsigned state_test(unsigned mask) const { return (state & mask); } | |
127 | void state_clear(unsigned mask) { state &= ~mask; } | |
128 | void state_set(unsigned mask) { state |= mask; } | |
129 | void state_reset(unsigned s) { state = s; } | |
130 | ||
131 | bool is_auth() const { return state_test(STATE_AUTH); } | |
132 | bool is_dirty() const { return state_test(STATE_DIRTY); } | |
133 | bool is_clean() const { return !is_dirty(); } | |
134 | bool is_rejoining() const { return state_test(STATE_REJOINING); } | |
135 | ||
136 | // -------------------------------------------- | |
137 | // authority | |
138 | virtual mds_authority_t authority() const = 0; | |
139 | bool is_ambiguous_auth() const { | |
140 | return authority().second != CDIR_AUTH_UNKNOWN; | |
141 | } | |
142 | ||
143 | // -------------------------------------------- | |
144 | // pins | |
145 | protected: | |
146 | __s32 ref; // reference count | |
147 | #ifdef MDS_REF_SET | |
148 | std::map<int,int> ref_map; | |
149 | #endif | |
150 | ||
151 | public: | |
152 | int get_num_ref(int by = -1) const { | |
153 | #ifdef MDS_REF_SET | |
154 | if (by >= 0) { | |
155 | if (ref_map.find(by) == ref_map.end()) { | |
156 | return 0; | |
157 | } else { | |
158 | return ref_map.find(by)->second; | |
159 | } | |
160 | } | |
161 | #endif | |
162 | return ref; | |
163 | } | |
164 | virtual const char *pin_name(int by) const = 0; | |
165 | //bool is_pinned_by(int by) { return ref_set.count(by); } | |
166 | //multiset<int>& get_ref_set() { return ref_set; } | |
167 | ||
168 | virtual void last_put() {} | |
169 | virtual void bad_put(int by) { | |
170 | #ifdef MDS_REF_SET | |
171 | assert(ref_map[by] > 0); | |
172 | #endif | |
173 | assert(ref > 0); | |
174 | } | |
175 | virtual void _put() {} | |
176 | void put(int by) { | |
177 | #ifdef MDS_REF_SET | |
178 | if (ref == 0 || ref_map[by] == 0) { | |
179 | #else | |
180 | if (ref == 0) { | |
181 | #endif | |
182 | bad_put(by); | |
183 | } else { | |
184 | ref--; | |
185 | #ifdef MDS_REF_SET | |
186 | ref_map[by]--; | |
187 | #endif | |
188 | if (ref == 0) | |
189 | last_put(); | |
190 | if (state_test(STATE_NOTIFYREF)) | |
191 | _put(); | |
192 | } | |
193 | } | |
194 | ||
195 | virtual void first_get() {} | |
196 | virtual void bad_get(int by) { | |
197 | #ifdef MDS_REF_SET | |
198 | assert(by < 0 || ref_map[by] == 0); | |
199 | #endif | |
200 | ceph_abort(); | |
201 | } | |
202 | void get(int by) { | |
203 | if (ref == 0) | |
204 | first_get(); | |
205 | ref++; | |
206 | #ifdef MDS_REF_SET | |
207 | if (ref_map.find(by) == ref_map.end()) | |
208 | ref_map[by] = 0; | |
209 | ref_map[by]++; | |
210 | #endif | |
211 | } | |
212 | ||
213 | void print_pin_set(std::ostream& out) const { | |
214 | #ifdef MDS_REF_SET | |
215 | std::map<int, int>::const_iterator it = ref_map.begin(); | |
216 | while (it != ref_map.end()) { | |
217 | out << " " << pin_name(it->first) << "=" << it->second; | |
218 | ++it; | |
219 | } | |
220 | #else | |
221 | out << " nref=" << ref; | |
222 | #endif | |
223 | } | |
224 | ||
225 | protected: | |
226 | int auth_pins; | |
227 | int nested_auth_pins; | |
228 | #ifdef MDS_AUTHPIN_SET | |
229 | multiset<void*> auth_pin_set; | |
230 | #endif | |
231 | ||
232 | public: | |
233 | bool is_auth_pinned() const { return auth_pins || nested_auth_pins; } | |
234 | int get_num_auth_pins() const { return auth_pins; } | |
235 | int get_num_nested_auth_pins() const { return nested_auth_pins; } | |
236 | ||
237 | void dump_states(Formatter *f) const; | |
238 | void dump(Formatter *f) const; | |
239 | ||
240 | // -------------------------------------------- | |
241 | // auth pins | |
242 | virtual bool can_auth_pin() const = 0; | |
243 | virtual void auth_pin(void *who) = 0; | |
244 | virtual void auth_unpin(void *who) = 0; | |
245 | virtual bool is_frozen() const = 0; | |
246 | virtual bool is_freezing() const = 0; | |
247 | virtual bool is_freezing_or_frozen() const { | |
248 | return is_frozen() || is_freezing(); | |
249 | } | |
250 | ||
251 | ||
252 | // -------------------------------------------- | |
253 | // replication (across mds cluster) | |
254 | protected: | |
255 | unsigned replica_nonce; // [replica] defined on replica | |
256 | compact_map<mds_rank_t,unsigned> replica_map; // [auth] mds -> nonce | |
257 | ||
258 | public: | |
259 | bool is_replicated() const { return !replica_map.empty(); } | |
260 | bool is_replica(mds_rank_t mds) const { return replica_map.count(mds); } | |
261 | int num_replicas() const { return replica_map.size(); } | |
262 | unsigned add_replica(mds_rank_t mds) { | |
263 | if (replica_map.count(mds)) | |
264 | return ++replica_map[mds]; // inc nonce | |
265 | if (replica_map.empty()) | |
266 | get(PIN_REPLICATED); | |
267 | return replica_map[mds] = 1; | |
268 | } | |
269 | void add_replica(mds_rank_t mds, unsigned nonce) { | |
270 | if (replica_map.empty()) | |
271 | get(PIN_REPLICATED); | |
272 | replica_map[mds] = nonce; | |
273 | } | |
274 | unsigned get_replica_nonce(mds_rank_t mds) { | |
275 | assert(replica_map.count(mds)); | |
276 | return replica_map[mds]; | |
277 | } | |
278 | void remove_replica(mds_rank_t mds) { | |
279 | assert(replica_map.count(mds)); | |
280 | replica_map.erase(mds); | |
281 | if (replica_map.empty()) | |
282 | put(PIN_REPLICATED); | |
283 | } | |
284 | void clear_replica_map() { | |
285 | if (!replica_map.empty()) | |
286 | put(PIN_REPLICATED); | |
287 | replica_map.clear(); | |
288 | } | |
289 | compact_map<mds_rank_t,unsigned>::iterator replicas_begin() { return replica_map.begin(); } | |
290 | compact_map<mds_rank_t,unsigned>::iterator replicas_end() { return replica_map.end(); } | |
291 | const compact_map<mds_rank_t,unsigned>& get_replicas() const { return replica_map; } | |
292 | void list_replicas(std::set<mds_rank_t>& ls) const { | |
293 | for (compact_map<mds_rank_t,unsigned>::const_iterator p = replica_map.begin(); | |
294 | p != replica_map.end(); | |
295 | ++p) | |
296 | ls.insert(p->first); | |
297 | } | |
298 | ||
299 | unsigned get_replica_nonce() const { return replica_nonce; } | |
300 | void set_replica_nonce(unsigned n) { replica_nonce = n; } | |
301 | ||
302 | ||
303 | // --------------------------------------------- | |
304 | // waiting | |
305 | protected: | |
306 | compact_multimap<uint64_t, pair<uint64_t, MDSInternalContextBase*> > waiting; | |
307 | static uint64_t last_wait_seq; | |
308 | ||
309 | public: | |
310 | bool is_waiter_for(uint64_t mask, uint64_t min=0) { | |
311 | if (!min) { | |
312 | min = mask; | |
313 | while (min & (min-1)) // if more than one bit is set | |
314 | min &= min-1; // clear LSB | |
315 | } | |
316 | for (auto p = waiting.lower_bound(min); | |
317 | p != waiting.end(); | |
318 | ++p) { | |
319 | if (p->first & mask) return true; | |
320 | if (p->first > mask) return false; | |
321 | } | |
322 | return false; | |
323 | } | |
324 | virtual void add_waiter(uint64_t mask, MDSInternalContextBase *c) { | |
325 | if (waiting.empty()) | |
326 | get(PIN_WAITER); | |
327 | ||
328 | uint64_t seq = 0; | |
329 | if (mask & WAIT_ORDERED) { | |
330 | seq = ++last_wait_seq; | |
331 | mask &= ~WAIT_ORDERED; | |
332 | } | |
333 | waiting.insert(pair<uint64_t, pair<uint64_t, MDSInternalContextBase*> >( | |
334 | mask, | |
335 | pair<uint64_t, MDSInternalContextBase*>(seq, c))); | |
336 | // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) | |
337 | // << "add_waiter " << hex << mask << dec << " " << c | |
338 | // << " on " << *this | |
339 | // << dendl; | |
340 | ||
341 | } | |
342 | virtual void take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) { | |
343 | if (waiting.empty()) return; | |
344 | ||
345 | // process ordered waiters in the same order that they were added. | |
346 | std::map<uint64_t, MDSInternalContextBase*> ordered_waiters; | |
347 | ||
348 | for (auto it = waiting.begin(); | |
349 | it != waiting.end(); ) { | |
350 | if (it->first & mask) { | |
351 | ||
352 | if (it->second.first > 0) | |
353 | ordered_waiters.insert(it->second); | |
354 | else | |
355 | ls.push_back(it->second.second); | |
356 | // pdout(10,g_conf->debug_mds) << (mdsco_db_line_prefix(this)) | |
357 | // << "take_waiting mask " << hex << mask << dec << " took " << it->second | |
358 | // << " tag " << hex << it->first << dec | |
359 | // << " on " << *this | |
360 | // << dendl; | |
361 | waiting.erase(it++); | |
362 | } else { | |
363 | // pdout(10,g_conf->debug_mds) << "take_waiting mask " << hex << mask << dec << " SKIPPING " << it->second | |
364 | // << " tag " << hex << it->first << dec | |
365 | // << " on " << *this | |
366 | // << dendl; | |
367 | ++it; | |
368 | } | |
369 | } | |
370 | for (auto it = ordered_waiters.begin(); | |
371 | it != ordered_waiters.end(); | |
372 | ++it) { | |
373 | ls.push_back(it->second); | |
374 | } | |
375 | if (waiting.empty()) | |
376 | put(PIN_WAITER); | |
377 | } | |
378 | void finish_waiting(uint64_t mask, int result = 0); | |
379 | ||
380 | // --------------------------------------------- | |
381 | // locking | |
382 | // noop unless overloaded. | |
383 | virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; } | |
384 | virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); } | |
385 | virtual void encode_lock_state(int type, bufferlist& bl) { ceph_abort(); } | |
386 | virtual void decode_lock_state(int type, bufferlist& bl) { ceph_abort(); } | |
387 | virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); } | |
388 | virtual void add_lock_waiter(int type, uint64_t mask, MDSInternalContextBase *c) { ceph_abort(); } | |
389 | virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; } | |
390 | ||
391 | virtual void clear_dirty_scattered(int type) { ceph_abort(); } | |
392 | ||
393 | // --------------------------------------------- | |
394 | // ordering | |
395 | virtual bool is_lt(const MDSCacheObject *r) const = 0; | |
396 | struct ptr_lt { | |
397 | bool operator()(const MDSCacheObject* l, const MDSCacheObject* r) const { | |
398 | return l->is_lt(r); | |
399 | } | |
400 | }; | |
401 | ||
402 | }; | |
403 | ||
404 | inline std::ostream& operator<<(std::ostream& out, MDSCacheObject &o) { | |
405 | o.print(out); | |
406 | return out; | |
407 | } | |
408 | ||
31f18b77 | 409 | inline std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o) { |
7c673cae FG |
410 | o.object->print_db_line_prefix(out); |
411 | return out; | |
412 | } | |
413 | ||
414 | #endif |