]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSCacheObject.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mds / MDSCacheObject.h
CommitLineData
7c673cae
FG
1#ifndef CEPH_MDSCACHEOBJECT_H
2#define CEPH_MDSCACHEOBJECT_H
3
7c673cae 4#include <ostream>
11fdf7f2 5#include <string_view>
7c673cae
FG
6
7#include "common/config.h"
181888fb
FG
8
9#include "include/Context.h"
11fdf7f2 10#include "include/ceph_assert.h"
181888fb 11#include "include/mempool.h"
7c673cae
FG
12#include "include/types.h"
13#include "include/xlist.h"
181888fb 14
7c673cae 15#include "mdstypes.h"
11fdf7f2 16#include "MDSContext.h"
f67539c2 17#include "include/elist.h"
7c673cae
FG
18
19#define MDS_REF_SET // define me for improved debug output, sanity checking
20//#define MDS_AUTHPIN_SET // define me for debugging auth pin leaks
21//#define MDS_VERIFY_FRAGSTAT // do (slow) sanity checking on frags
22
9f95a23c
TL
23/*
24 * for metadata leases to clients
25 */
7c673cae
FG
26class MLock;
27class SimpleLock;
28class MDSCacheObject;
11fdf7f2 29class MDSContext;
7c673cae 30
f67539c2
TL
31namespace ceph {
32class Formatter;
33}
34
7c673cae 35struct ClientLease {
9f95a23c
TL
36 ClientLease(client_t c, MDSCacheObject *p) :
37 client(c), parent(p),
38 item_session_lease(this),
39 item_lease(this) { }
40 ClientLease() = delete;
41
7c673cae
FG
42 client_t client;
43 MDSCacheObject *parent;
44
94b18763 45 ceph_seq_t seq = 0;
7c673cae
FG
46 utime_t ttl;
47 xlist<ClientLease*>::item item_session_lease; // per-session list
48 xlist<ClientLease*>::item item_lease; // global list
7c673cae
FG
49};
50
7c673cae
FG
51// print hack
52struct mdsco_db_line_prefix {
7c673cae 53 explicit mdsco_db_line_prefix(MDSCacheObject *o) : object(o) {}
9f95a23c 54 MDSCacheObject *object;
7c673cae 55};
7c673cae
FG
56
57class MDSCacheObject {
58 public:
9f95a23c
TL
59 typedef mempool::mds_co::compact_map<mds_rank_t,unsigned> replica_map_type;
60
61 struct ptr_lt {
62 bool operator()(const MDSCacheObject* l, const MDSCacheObject* r) const {
63 return l->is_lt(r);
64 }
65 };
66
7c673cae
FG
67 // -- pins --
68 const static int PIN_REPLICATED = 1000;
69 const static int PIN_DIRTY = 1001;
70 const static int PIN_LOCK = -1002;
71 const static int PIN_REQUEST = -1003;
72 const static int PIN_WAITER = 1004;
73 const static int PIN_DIRTYSCATTERED = -1005;
74 static const int PIN_AUTHPIN = 1006;
75 static const int PIN_PTRWAITER = -1007;
76 const static int PIN_TEMPEXPORTING = 1008; // temp pin between encode_ and finish_export
77 static const int PIN_CLIENTLEASE = 1009;
78 static const int PIN_DISCOVERBASE = 1010;
f67539c2 79 static const int PIN_SCRUBQUEUE = 1011; // for scrub of inode and dir
7c673cae 80
7c673cae
FG
81 // -- state --
82 const static int STATE_AUTH = (1<<30);
83 const static int STATE_DIRTY = (1<<29);
84 const static int STATE_NOTIFYREF = (1<<28); // notify dropping ref drop through _put()
85 const static int STATE_REJOINING = (1<<27); // replica has not joined w/ primary copy
86 const static int STATE_REJOINUNDEF = (1<<26); // contents undefined.
87
7c673cae
FG
88 // -- wait --
89 const static uint64_t WAIT_ORDERED = (1ull<<61);
90 const static uint64_t WAIT_SINGLEAUTH = (1ull<<60);
91 const static uint64_t WAIT_UNFREEZE = (1ull<<59); // pka AUTHPINNABLE
92
f67539c2
TL
93 elist<MDSCacheObject*>::item item_scrub; // for scrub inode or dir
94
94b18763 95 MDSCacheObject() {}
7c673cae
FG
96 virtual ~MDSCacheObject() {}
97
9f95a23c
TL
98 std::string_view generic_pin_name(int p) const;
99
7c673cae 100 // printing
aee94f69
TL
101 virtual void print(std::ostream& out) const = 0;
102 virtual std::ostream& print_db_line_prefix(std::ostream& out) const {
7c673cae
FG
103 return out << "mdscacheobject(" << this << ") ";
104 }
7c673cae 105
7c673cae
FG
106 unsigned get_state() const { return state; }
107 unsigned state_test(unsigned mask) const { return (state & mask); }
108 void state_clear(unsigned mask) { state &= ~mask; }
109 void state_set(unsigned mask) { state |= mask; }
110 void state_reset(unsigned s) { state = s; }
111
112 bool is_auth() const { return state_test(STATE_AUTH); }
113 bool is_dirty() const { return state_test(STATE_DIRTY); }
114 bool is_clean() const { return !is_dirty(); }
115 bool is_rejoining() const { return state_test(STATE_REJOINING); }
116
117 // --------------------------------------------
118 // authority
119 virtual mds_authority_t authority() const = 0;
f67539c2 120 virtual bool is_ambiguous_auth() const {
7c673cae
FG
121 return authority().second != CDIR_AUTH_UNKNOWN;
122 }
123
7c673cae
FG
124 int get_num_ref(int by = -1) const {
125#ifdef MDS_REF_SET
126 if (by >= 0) {
127 if (ref_map.find(by) == ref_map.end()) {
128 return 0;
129 } else {
130 return ref_map.find(by)->second;
131 }
132 }
133#endif
134 return ref;
135 }
11fdf7f2 136 virtual std::string_view pin_name(int by) const = 0;
7c673cae
FG
137 //bool is_pinned_by(int by) { return ref_set.count(by); }
138 //multiset<int>& get_ref_set() { return ref_set; }
139
140 virtual void last_put() {}
141 virtual void bad_put(int by) {
142#ifdef MDS_REF_SET
11fdf7f2 143 ceph_assert(ref_map[by] > 0);
7c673cae 144#endif
11fdf7f2 145 ceph_assert(ref > 0);
7c673cae
FG
146 }
147 virtual void _put() {}
148 void put(int by) {
149#ifdef MDS_REF_SET
150 if (ref == 0 || ref_map[by] == 0) {
151#else
152 if (ref == 0) {
153#endif
154 bad_put(by);
155 } else {
156 ref--;
157#ifdef MDS_REF_SET
158 ref_map[by]--;
159#endif
160 if (ref == 0)
161 last_put();
162 if (state_test(STATE_NOTIFYREF))
163 _put();
164 }
165 }
166
167 virtual void first_get() {}
168 virtual void bad_get(int by) {
169#ifdef MDS_REF_SET
11fdf7f2 170 ceph_assert(by < 0 || ref_map[by] == 0);
7c673cae
FG
171#endif
172 ceph_abort();
173 }
174 void get(int by) {
175 if (ref == 0)
176 first_get();
177 ref++;
178#ifdef MDS_REF_SET
179 if (ref_map.find(by) == ref_map.end())
180 ref_map[by] = 0;
181 ref_map[by]++;
182#endif
183 }
184
185 void print_pin_set(std::ostream& out) const {
186#ifdef MDS_REF_SET
11fdf7f2
TL
187 for(auto const &p : ref_map) {
188 out << " " << pin_name(p.first) << "=" << p.second;
7c673cae
FG
189 }
190#else
191 out << " nref=" << ref;
192#endif
193 }
194
7c673cae 195 int get_num_auth_pins() const { return auth_pins; }
11fdf7f2
TL
196#ifdef MDS_AUTHPIN_SET
197 void print_authpin_set(std::ostream& out) const {
198 out << " (" << auth_pin_set << ")";
199 }
200#endif
7c673cae 201
f67539c2
TL
202 void dump_states(ceph::Formatter *f) const;
203 void dump(ceph::Formatter *f) const;
7c673cae 204
7c673cae 205 // auth pins
91327a77
AA
206 enum {
207 // can_auth_pin() error codes
208 ERR_NOT_AUTH = 1,
209 ERR_EXPORTING_TREE,
210 ERR_FRAGMENTING_DIR,
211 ERR_EXPORTING_INODE,
212 };
213 virtual bool can_auth_pin(int *err_code=nullptr) const = 0;
7c673cae
FG
214 virtual void auth_pin(void *who) = 0;
215 virtual void auth_unpin(void *who) = 0;
216 virtual bool is_frozen() const = 0;
217 virtual bool is_freezing() const = 0;
218 virtual bool is_freezing_or_frozen() const {
219 return is_frozen() || is_freezing();
220 }
221
181888fb
FG
222 bool is_replicated() const { return !get_replicas().empty(); }
223 bool is_replica(mds_rank_t mds) const { return get_replicas().count(mds); }
224 int num_replicas() const { return get_replicas().size(); }
7c673cae 225 unsigned add_replica(mds_rank_t mds) {
181888fb
FG
226 if (get_replicas().count(mds))
227 return ++get_replicas()[mds]; // inc nonce
228 if (get_replicas().empty())
7c673cae 229 get(PIN_REPLICATED);
181888fb 230 return get_replicas()[mds] = 1;
7c673cae
FG
231 }
232 void add_replica(mds_rank_t mds, unsigned nonce) {
181888fb 233 if (get_replicas().empty())
7c673cae 234 get(PIN_REPLICATED);
181888fb 235 get_replicas()[mds] = nonce;
7c673cae
FG
236 }
237 unsigned get_replica_nonce(mds_rank_t mds) {
11fdf7f2 238 ceph_assert(get_replicas().count(mds));
181888fb 239 return get_replicas()[mds];
7c673cae
FG
240 }
241 void remove_replica(mds_rank_t mds) {
11fdf7f2 242 ceph_assert(get_replicas().count(mds));
181888fb
FG
243 get_replicas().erase(mds);
244 if (get_replicas().empty()) {
7c673cae 245 put(PIN_REPLICATED);
181888fb 246 }
7c673cae
FG
247 }
248 void clear_replica_map() {
181888fb 249 if (!get_replicas().empty())
7c673cae
FG
250 put(PIN_REPLICATED);
251 replica_map.clear();
252 }
181888fb
FG
253 replica_map_type& get_replicas() { return replica_map; }
254 const replica_map_type& get_replicas() const { return replica_map; }
7c673cae 255 void list_replicas(std::set<mds_rank_t>& ls) const {
181888fb
FG
256 for (const auto &p : get_replicas()) {
257 ls.insert(p.first);
258 }
7c673cae
FG
259 }
260
261 unsigned get_replica_nonce() const { return replica_nonce; }
262 void set_replica_nonce(unsigned n) { replica_nonce = n; }
263
9f95a23c 264 bool is_waiter_for(uint64_t mask, uint64_t min=0);
11fdf7f2
TL
265 virtual void add_waiter(uint64_t mask, MDSContext *c) {
266 if (waiting.empty())
7c673cae
FG
267 get(PIN_WAITER);
268
269 uint64_t seq = 0;
270 if (mask & WAIT_ORDERED) {
271 seq = ++last_wait_seq;
272 mask &= ~WAIT_ORDERED;
273 }
f67539c2 274 waiting.insert(std::pair<uint64_t, std::pair<uint64_t, MDSContext*> >(
7c673cae 275 mask,
f67539c2 276 std::pair<uint64_t, MDSContext*>(seq, c)));
11fdf7f2 277// pdout(10,g_conf()->debug_mds) << (mdsco_db_line_prefix(this))
7c673cae
FG
278// << "add_waiter " << hex << mask << dec << " " << c
279// << " on " << *this
280// << dendl;
281
282 }
9f95a23c 283 virtual void take_waiting(uint64_t mask, MDSContext::vec& ls);
7c673cae
FG
284 void finish_waiting(uint64_t mask, int result = 0);
285
286 // ---------------------------------------------
287 // locking
288 // noop unless overloaded.
289 virtual SimpleLock* get_lock(int type) { ceph_abort(); return 0; }
290 virtual void set_object_info(MDSCacheObjectInfo &info) { ceph_abort(); }
f67539c2
TL
291 virtual void encode_lock_state(int type, ceph::buffer::list& bl) { ceph_abort(); }
292 virtual void decode_lock_state(int type, const ceph::buffer::list& bl) { ceph_abort(); }
7c673cae 293 virtual void finish_lock_waiters(int type, uint64_t mask, int r=0) { ceph_abort(); }
11fdf7f2 294 virtual void add_lock_waiter(int type, uint64_t mask, MDSContext *c) { ceph_abort(); }
7c673cae
FG
295 virtual bool is_lock_waiting(int type, uint64_t mask) { ceph_abort(); return false; }
296
297 virtual void clear_dirty_scattered(int type) { ceph_abort(); }
298
299 // ---------------------------------------------
300 // ordering
301 virtual bool is_lt(const MDSCacheObject *r) const = 0;
7c673cae 302
9f95a23c
TL
303 // state
304 protected:
305 __u32 state = 0; // state bits
306
307 // pins
308 __s32 ref = 0; // reference count
309#ifdef MDS_REF_SET
310 mempool::mds_co::flat_map<int,int> ref_map;
311#endif
312
313 int auth_pins = 0;
314#ifdef MDS_AUTHPIN_SET
315 mempool::mds_co::multiset<void*> auth_pin_set;
316#endif
317
318 // replication (across mds cluster)
319 unsigned replica_nonce = 0; // [replica] defined on replica
320 replica_map_type replica_map; // [auth] mds -> nonce
321
322 // ---------------------------------------------
323 // waiting
324 private:
325 mempool::mds_co::compact_multimap<uint64_t, std::pair<uint64_t, MDSContext*>> waiting;
326 static uint64_t last_wait_seq;
7c673cae
FG
327};
328
aee94f69 329inline std::ostream& operator<<(std::ostream& out, const MDSCacheObject& o) {
7c673cae
FG
330 o.print(out);
331 return out;
332}
333
31f18b77 334inline std::ostream& operator<<(std::ostream& out, const mdsco_db_line_prefix& o) {
7c673cae
FG
335 o.object->print_db_line_prefix(out);
336 return out;
337}
7c673cae 338#endif