]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #ifndef CEPH_MDSCACHEOBJECT_H |
2 | #define CEPH_MDSCACHEOBJECT_H | |
3 | ||
7c673cae | 4 | #include <ostream> |
11fdf7f2 | 5 | #include <string_view> |
7c673cae FG |
6 | |
7 | #include "common/config.h" | |
181888fb FG |
8 | |
9 | #include "include/Context.h" | |
11fdf7f2 | 10 | #include "include/ceph_assert.h" |
181888fb | 11 | #include "include/mempool.h" |
7c673cae FG |
12 | #include "include/types.h" |
13 | #include "include/xlist.h" | |
181888fb | 14 | |
7c673cae | 15 | #include "mdstypes.h" |
11fdf7f2 | 16 | #include "MDSContext.h" |
f67539c2 | 17 | #include "include/elist.h" |
7c673cae FG |
18 | |
19 | #define MDS_REF_SET // define me for improved debug output, sanity checking | |
20 | //#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks | |
21 | //#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags | |
22 | ||
9f95a23c TL |
23 | /* |
24 | * for metadata leases to clients | |
25 | */ | |
7c673cae FG |
26 | class MLock; |
27 | class SimpleLock; | |
28 | class MDSCacheObject; | |
11fdf7f2 | 29 | class MDSContext; |
7c673cae | 30 | |
f67539c2 TL |
31 | namespace ceph { |
32 | class Formatter; | |
33 | } | |
34 | ||
7c673cae | 35 | struct ClientLease { |
9f95a23c TL |
36 | ClientLease(client_t c, MDSCacheObject *p) : |
37 | client(c), parent(p), | |
38 | item_session_lease(this), | |
39 | item_lease(this) { } | |
40 | ClientLease() = delete; | |
41 | ||
7c673cae FG |
42 | client_t client; |
43 | MDSCacheObject *parent; | |
44 | ||
94b18763 | 45 | ceph_seq_t seq = 0; |
7c673cae FG |
46 | utime_t ttl; |
47 | xlist<ClientLease*>::item item_session_lease; // per-session list | |
48 | xlist<ClientLease*>::item item_lease; // global list | |
7c673cae FG |
49 | }; |
50 | ||
7c673cae FG |
51 | // print hack |
52 | struct mdsco_db_line_prefix { | |
7c673cae | 53 | explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {} |
9f95a23c | 54 | MDSCacheObject *object; |
7c673cae | 55 | }; |
7c673cae FG |
56 | |
57 | class MDSCacheObject { | |
58 | public: | |
9f95a23c TL |
59 | typedef mempool::mds_co::compact_map<mds_rank_t,unsigned> replica_map_type; |
60 | ||
61 | struct ptr_lt { | |
62 | bool operator()(const MDSCacheObject* l, const MDSCacheObject* r) const { | |
63 | return l->is_lt(r); | |
64 | } | |
65 | }; | |
66 | ||
7c673cae FG |
67 | // -- pins -- |
68 | const static int PIN_REPLICATED = 1000; | |
69 | const static int PIN_DIRTY = 1001; | |
70 | const static int PIN_LOCK = -1002; | |
71 | const static int PIN_REQUEST = -1003; | |
72 | const static int PIN_WAITER = 1004; | |
73 | const static int PIN_DIRTYSCATTERED = -1005; | |
74 | static const int PIN_AUTHPIN = 1006; | |
75 | static const int PIN_PTRWAITER = -1007; | |
76 | const static int PIN_TEMPEXPORTING = 1008; // temp pin between encode_ and finish_export | |
77 | static const int PIN_CLIENTLEASE = 1009; | |
78 | static const int PIN_DISCOVERBASE = 1010; | |
f67539c2 | 79 | static const int PIN_SCRUBQUEUE = 1011; // for scrub of inode and dir |
7c673cae | 80 | |
7c673cae FG |
81 | // -- state -- |
82 | const static int STATE_AUTH = (1<<30); | |
83 | const static int STATE_DIRTY = (1<<29); | |
84 | const static int STATE_NOTIFYREF = (1<<28); // notify dropping ref drop through _put() | |
85 | const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy | |
86 | const static int STATE_REJOINUNDEF = (1<<26); // contents undefined. | |
87 | ||
7c673cae FG |
88 | // -- wait -- |
89 | const static uint64_t WAIT_ORDERED = (1ull<<61); | |
90 | const static uint64_t WAIT_SINGLEAUTH = (1ull<<60); | |
91 | const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE | |
92 | ||
f67539c2 TL |
93 | elist<MDSCacheObject*>::item item_scrub; // for scrub inode or dir |
94 | ||
94b18763 | 95 | MDSCacheObject() {} |
7c673cae FG |
96 | virtual ~MDSCacheObject() {} |
97 | ||
9f95a23c TL |
98 | std::string_view generic_pin_name(int p) const; |
99 | ||
7c673cae | 100 | // printing |
aee94f69 TL |
101 | virtual void print(std::ostream& out) const = 0; |
102 | virtual std::ostream& print_db_line_prefix(std::ostream& out) const { | |
7c673cae FG |
103 | return out << "mdscacheobject(" << this << ") "; |
104 | } | |
7c673cae | 105 | |
7c673cae FG |
106 | unsigned get_state() const { return state; } |
107 | unsigned state_test(unsigned mask) const { return (state & mask); } | |
108 | void state_clear(unsigned mask) { state &= ~mask; } | |
109 | void state_set(unsigned mask) { state |= mask; } | |
110 | void state_reset(unsigned s) { state = s; } | |
111 | ||
112 | bool is_auth() const { return state_test(STATE_AUTH); } | |
113 | bool is_dirty() const { return state_test(STATE_DIRTY); } | |
114 | bool is_clean() const { return !is_dirty(); } | |
115 | bool is_rejoining() const { return state_test(STATE_REJOINING); } | |
116 | ||
117 | // -------------------------------------------- | |
118 | // authority | |
119 | virtual mds_authority_t authority() const = 0; | |
f67539c2 | 120 | virtual bool is_ambiguous_auth() const { |
7c673cae FG |
121 | return authority().second != CDIR_AUTH_UNKNOWN; |
122 | } | |
123 | ||
7c673cae FG |
124 | int get_num_ref(int by = -1) const { |
125 | #ifdef MDS_REF_SET | |
126 | if (by >= 0) { | |
127 | if (ref_map.find(by) == ref_map.end()) { | |
128 | return 0; | |
129 | } else { | |
130 | return ref_map.find(by)->second; | |
131 | } | |
132 | } | |
133 | #endif | |
134 | return ref; | |
135 | } | |
11fdf7f2 | 136 | virtual std::string_view pin_name(int by) const = 0; |
7c673cae FG |
137 | //bool is_pinned_by(int by) { return ref_set.count(by); } |
138 | //multiset<int>& get_ref_set() { return ref_set; } | |
139 | ||
140 | virtual void last_put() {} | |
141 | virtual void bad_put(int by) { | |
142 | #ifdef MDS_REF_SET | |
11fdf7f2 | 143 | ceph_assert(ref_map[by] > 0); |
7c673cae | 144 | #endif |
11fdf7f2 | 145 | ceph_assert(ref > 0); |
7c673cae FG |
146 | } |
147 | virtual void _put() {} | |
148 | void put(int by) { | |
149 | #ifdef MDS_REF_SET | |
150 | if (ref == 0 || ref_map[by] == 0) { | |
151 | #else | |
152 | if (ref == 0) { | |
153 | #endif | |
154 | bad_put(by); | |
155 | } else { | |
156 | ref--; | |
157 | #ifdef MDS_REF_SET | |
158 | ref_map[by]--; | |
159 | #endif | |
160 | if (ref == 0) | |
161 | last_put(); | |
162 | if (state_test(STATE_NOTIFYREF)) | |
163 | _put(); | |
164 | } | |
165 | } | |
166 | ||
167 | virtual void first_get() {} | |
168 | virtual void bad_get(int by) { | |
169 | #ifdef MDS_REF_SET | |
11fdf7f2 | 170 | ceph_assert(by < 0 || ref_map[by] == 0); |
7c673cae FG |
171 | #endif |
172 | ceph_abort(); | |
173 | } | |
174 | void get(int by) { | |
175 | if (ref == 0) | |
176 | first_get(); | |
177 | ref++; | |
178 | #ifdef MDS_REF_SET | |
179 | if (ref_map.find(by) == ref_map.end()) | |
180 | ref_map[by] = 0; | |
181 | ref_map[by]++; | |
182 | #endif | |
183 | } | |
184 | ||
185 | void print_pin_set(std::ostream& out) const { | |
186 | #ifdef MDS_REF_SET | |
11fdf7f2 TL |
187 | for(auto const &p : ref_map) { |
188 | out << " " << pin_name(p.first) << "=" << p.second; | |
7c673cae FG |
189 | } |
190 | #else | |
191 | out << " nref=" << ref; | |
192 | #endif | |
193 | } | |
194 | ||
7c673cae | 195 | int get_num_auth_pins() const { return auth_pins; } |
11fdf7f2 TL |
196 | #ifdef MDS_AUTHPIN_SET |
197 | void print_authpin_set(std::ostream& out) const { | |
198 | out << " (" << auth_pin_set << ")"; | |
199 | } | |
200 | #endif | |
7c673cae | 201 | |
f67539c2 TL |
202 | void dump_states(ceph::Formatter *f) const; |
203 | void dump(ceph::Formatter *f) const; | |
7c673cae | 204 | |
7c673cae | 205 | // auth pins |
91327a77 AA |
206 | enum { |
207 | // can_auth_pin() error codes | |
208 | ERR_NOT_AUTH = 1, | |
209 | ERR_EXPORTING_TREE, | |
210 | ERR_FRAGMENTING_DIR, | |
211 | ERR_EXPORTING_INODE, | |
212 | }; | |
213 | virtual bool can_auth_pin(int *err_code=nullptr) const = 0; | |
7c673cae FG |
214 | virtual void auth_pin(void *who) = 0; |
215 | virtual void auth_unpin(void *who) = 0; | |
216 | virtual bool is_frozen() const = 0; | |
217 | virtual bool is_freezing() const = 0; | |
218 | virtual bool is_freezing_or_frozen() const { | |
219 | return is_frozen() || is_freezing(); | |
220 | } | |
221 | ||
181888fb FG |
222 | bool is_replicated() const { return !get_replicas().empty(); } |
223 | bool is_replica(mds_rank_t mds) const { return get_replicas().count(mds); } | |
224 | int num_replicas() const { return get_replicas().size(); } | |
7c673cae | 225 | unsigned add_replica(mds_rank_t mds) { |
181888fb FG |
226 | if (get_replicas().count(mds)) |
227 | return ++get_replicas()[mds]; // inc nonce | |
228 | if (get_replicas().empty()) | |
7c673cae | 229 | get(PIN_REPLICATED); |
181888fb | 230 | return get_replicas()[mds] = 1; |
7c673cae FG |
231 | } |
232 | void add_replica(mds_rank_t mds, unsigned nonce) { | |
181888fb | 233 | if (get_replicas().empty()) |
7c673cae | 234 | get(PIN_REPLICATED); |
181888fb | 235 | get_replicas()[mds] = nonce; |
7c673cae FG |
236 | } |
237 | unsigned get_replica_nonce(mds_rank_t mds) { | |
11fdf7f2 | 238 | ceph_assert(get_replicas().count(mds)); |
181888fb | 239 | return get_replicas()[mds]; |
7c673cae FG |
240 | } |
241 | void remove_replica(mds_rank_t mds) { | |
11fdf7f2 | 242 | ceph_assert(get_replicas().count(mds)); |
181888fb FG |
243 | get_replicas().erase(mds); |
244 | if (get_replicas().empty()) { | |
7c673cae | 245 | put(PIN_REPLICATED); |
181888fb | 246 | } |
7c673cae FG |
247 | } |
248 | void clear_replica_map() { | |
181888fb | 249 | if (!get_replicas().empty()) |
7c673cae FG |
250 | put(PIN_REPLICATED); |
251 | replica_map.clear(); | |
252 | } | |
181888fb FG |
253 | replica_map_type& get_replicas() { return replica_map; } |
254 | const replica_map_type& get_replicas() const { return replica_map; } | |
7c673cae | 255 | void list_replicas(std::set<mds_rank_t>& ls) const { |
181888fb FG |
256 | for (const auto &p : get_replicas()) { |
257 | ls.insert(p.first); | |
258 | } | |
7c673cae FG |
259 | } |
260 | ||
261 | unsigned get_replica_nonce() const { return replica_nonce; } | |
262 | void set_replica_nonce(unsigned n) { replica_nonce = n; } | |
263 | ||
9f95a23c | 264 | bool is_waiter_for(uint64_t mask, uint64_t min=0); |
11fdf7f2 TL |
265 | virtual void add_waiter(uint64_t mask, MDSContext *c) { |
266 | if (waiting.empty()) | |
7c673cae FG |
267 | get(PIN_WAITER); |
268 | ||
269 | uint64_t seq = 0; | |
270 | if (mask & WAIT_ORDERED) { | |
271 | seq = ++last_wait_seq; | |
272 | mask &= ~WAIT_ORDERED; | |
273 | } | |
f67539c2 | 274 | waiting.insert(std::pair<uint64_t, std::pair<uint64_t, MDSContext*> >( |
7c673cae | 275 | mask, |
f67539c2 | 276 | std::pair<uint64_t, MDSContext*>(seq, c))); |
11fdf7f2 | 277 | // pdout(10,g_conf()->debug_mds) << (mdsco_db_line_prefix(this)) |
7c673cae FG |
278 | // << "add_waiter " << hex << mask << dec << " " << c |
279 | // << " on " << *this | |
280 | // << dendl; | |
281 | ||
282 | } | |
9f95a23c | 283 | virtual void take_waiting(uint64_t mask, MDSContext::vec& ls); |
7c673cae FG |
284 | void finish_waiting(uint64_t mask, int result = 0); |
285 | ||
286 | // --------------------------------------------- | |
287 | // locking | |
288 | // noop unless overloaded. | |
289 | virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; } | |
290 | virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); } | |
f67539c2 TL |
291 | virtual void encode_lock_state(int type, ceph::buffer::list& bl) { ceph_abort(); } |
292 | virtual void decode_lock_state(int type, const ceph::buffer::list& bl) { ceph_abort(); } | |
7c673cae | 293 | virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); } |
11fdf7f2 | 294 | virtual void add_lock_waiter(int type, uint64_t mask, MDSContext *c) { ceph_abort(); } |
7c673cae FG |
295 | virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; } |
296 | ||
297 | virtual void clear_dirty_scattered(int type) { ceph_abort(); } | |
298 | ||
299 | // --------------------------------------------- | |
300 | // ordering | |
301 | virtual bool is_lt(const MDSCacheObject *r) const = 0; | |
7c673cae | 302 | |
9f95a23c TL |
303 | // state |
304 | protected: | |
305 | __u32 state = 0; // state bits | |
306 | ||
307 | // pins | |
308 | __s32 ref = 0; // reference count | |
309 | #ifdef MDS_REF_SET | |
310 | mempool::mds_co::flat_map<int,int> ref_map; | |
311 | #endif | |
312 | ||
313 | int auth_pins = 0; | |
314 | #ifdef MDS_AUTHPIN_SET | |
315 | mempool::mds_co::multiset<void*> auth_pin_set; | |
316 | #endif | |
317 | ||
318 | // replication (across mds cluster) | |
319 | unsigned replica_nonce = 0; // [replica] defined on replica | |
320 | replica_map_type replica_map; // [auth] mds -> nonce | |
321 | ||
322 | // --------------------------------------------- | |
323 | // waiting | |
324 | private: | |
325 | mempool::mds_co::compact_multimap<uint64_t, std::pair<uint64_t, MDSContext*>> waiting; | |
326 | static uint64_t last_wait_seq; | |
7c673cae FG |
327 | }; |
328 | ||
aee94f69 | 329 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObject& o) { |
7c673cae FG |
330 | o.print(out); |
331 | return out; | |
332 | } | |
333 | ||
31f18b77 | 334 | inline std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o) { |
7c673cae FG |
335 | o.object->print_db_line_prefix(out); |
336 | return out; | |
337 | } | |
7c673cae | 338 | #endif |