]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/osd_internal_types.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / osd / osd_internal_types.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_OSD_INTERNAL_TYPES_H
5 #define CEPH_OSD_INTERNAL_TYPES_H
6
7 #include "osd_types.h"
8 #include "OpRequest.h"
9
10 /*
11 * keep tabs on object modifications that are in flight.
12 * we need to know the projected existence, size, snapset,
13 * etc., because we don't send writes down to disk until after
14 * replicas ack.
15 */
16
17 struct SnapSetContext {
18 hobject_t oid;
19 SnapSet snapset;
20 int ref;
21 bool registered : 1;
22 bool exists : 1;
23
24 explicit SnapSetContext(const hobject_t& o) :
25 oid(o), ref(0), registered(false), exists(true) { }
26 };
27
28 struct ObjectContext;
29
30 struct ObjectState {
31 object_info_t oi;
32 bool exists; ///< the stored object exists (i.e., we will remember the object_info_t)
33
34 ObjectState() : exists(false) {}
35
36 ObjectState(const object_info_t &oi_, bool exists_)
37 : oi(oi_), exists(exists_) {}
38 };
39
40 typedef ceph::shared_ptr<ObjectContext> ObjectContextRef;
41
42 struct ObjectContext {
43 ObjectState obs;
44
45 SnapSetContext *ssc; // may be null
46
47 Context *destructor_callback;
48
49 private:
50 Mutex lock;
51 public:
52 Cond cond;
53 int unstable_writes, readers, writers_waiting, readers_waiting;
54
55
56 // any entity in obs.oi.watchers MUST be in either watchers or unconnected_watchers.
57 map<pair<uint64_t, entity_name_t>, WatchRef> watchers;
58
59 // attr cache
60 map<string, bufferlist> attr_cache;
61
62 struct RWState {
63 enum State {
64 RWNONE,
65 RWREAD,
66 RWWRITE,
67 RWEXCL,
68 };
69 static const char *get_state_name(State s) {
70 switch (s) {
71 case RWNONE: return "none";
72 case RWREAD: return "read";
73 case RWWRITE: return "write";
74 case RWEXCL: return "excl";
75 default: return "???";
76 }
77 }
78 const char *get_state_name() const {
79 return get_state_name(state);
80 }
81
82 list<OpRequestRef> waiters; ///< ops waiting on state change
83 int count; ///< number of readers or writers
84
85 State state:4; ///< rw state
86 /// if set, restart backfill when we can get a read lock
87 bool recovery_read_marker:1;
88 /// if set, requeue snaptrim on lock release
89 bool snaptrimmer_write_marker:1;
90
91 RWState()
92 : count(0),
93 state(RWNONE),
94 recovery_read_marker(false),
95 snaptrimmer_write_marker(false)
96 {}
97 bool get_read(OpRequestRef op) {
98 if (get_read_lock()) {
99 return true;
100 } // else
101 waiters.push_back(op);
102 return false;
103 }
104 /// this function adjusts the counts if necessary
105 bool get_read_lock() {
106 // don't starve anybody!
107 if (!waiters.empty()) {
108 return false;
109 }
110 switch (state) {
111 case RWNONE:
112 assert(count == 0);
113 state = RWREAD;
114 // fall through
115 case RWREAD:
116 count++;
117 return true;
118 case RWWRITE:
119 return false;
120 case RWEXCL:
121 return false;
122 default:
123 assert(0 == "unhandled case");
124 return false;
125 }
126 }
127
128 bool get_write(OpRequestRef op, bool greedy=false) {
129 if (get_write_lock(greedy)) {
130 return true;
131 } // else
132 if (op)
133 waiters.push_back(op);
134 return false;
135 }
136 bool get_write_lock(bool greedy=false) {
137 if (!greedy) {
138 // don't starve anybody!
139 if (!waiters.empty() ||
140 recovery_read_marker) {
141 return false;
142 }
143 }
144 switch (state) {
145 case RWNONE:
146 assert(count == 0);
147 state = RWWRITE;
148 // fall through
149 case RWWRITE:
150 count++;
151 return true;
152 case RWREAD:
153 return false;
154 case RWEXCL:
155 return false;
156 default:
157 assert(0 == "unhandled case");
158 return false;
159 }
160 }
161 bool get_excl_lock() {
162 switch (state) {
163 case RWNONE:
164 assert(count == 0);
165 state = RWEXCL;
166 count = 1;
167 return true;
168 case RWWRITE:
169 return false;
170 case RWREAD:
171 return false;
172 case RWEXCL:
173 return false;
174 default:
175 assert(0 == "unhandled case");
176 return false;
177 }
178 }
179 bool get_excl(OpRequestRef op) {
180 if (get_excl_lock()) {
181 return true;
182 } // else
183 if (op)
184 waiters.push_back(op);
185 return false;
186 }
187 /// same as get_write_lock, but ignore starvation
188 bool take_write_lock() {
189 if (state == RWWRITE) {
190 count++;
191 return true;
192 }
193 return get_write_lock();
194 }
195 void dec(list<OpRequestRef> *requeue) {
196 assert(count > 0);
197 assert(requeue);
198 count--;
199 if (count == 0) {
200 state = RWNONE;
201 requeue->splice(requeue->end(), waiters);
202 }
203 }
204 void put_read(list<OpRequestRef> *requeue) {
205 assert(state == RWREAD);
206 dec(requeue);
207 }
208 void put_write(list<OpRequestRef> *requeue) {
209 assert(state == RWWRITE);
210 dec(requeue);
211 }
212 void put_excl(list<OpRequestRef> *requeue) {
213 assert(state == RWEXCL);
214 dec(requeue);
215 }
216 bool empty() const { return state == RWNONE; }
217 } rwstate;
218
219 bool get_read(OpRequestRef op) {
220 return rwstate.get_read(op);
221 }
222 bool get_write(OpRequestRef op) {
223 return rwstate.get_write(op, false);
224 }
225 bool get_excl(OpRequestRef op) {
226 return rwstate.get_excl(op);
227 }
228 bool get_lock_type(OpRequestRef op, RWState::State type) {
229 switch (type) {
230 case RWState::RWWRITE:
231 return get_write(op);
232 case RWState::RWREAD:
233 return get_read(op);
234 case RWState::RWEXCL:
235 return get_excl(op);
236 default:
237 assert(0 == "invalid lock type");
238 return true;
239 }
240 }
241 bool get_write_greedy(OpRequestRef op) {
242 return rwstate.get_write(op, true);
243 }
244 bool get_snaptrimmer_write(bool mark_if_unsuccessful) {
245 if (rwstate.get_write_lock()) {
246 return true;
247 } else {
248 if (mark_if_unsuccessful)
249 rwstate.snaptrimmer_write_marker = true;
250 return false;
251 }
252 }
253 bool get_recovery_read() {
254 rwstate.recovery_read_marker = true;
255 if (rwstate.get_read_lock()) {
256 return true;
257 }
258 return false;
259 }
260 bool try_get_read_lock() {
261 return rwstate.get_read_lock();
262 }
263 void drop_recovery_read(list<OpRequestRef> *ls) {
264 assert(rwstate.recovery_read_marker);
265 rwstate.put_read(ls);
266 rwstate.recovery_read_marker = false;
267 }
268 void put_lock_type(
269 ObjectContext::RWState::State type,
270 list<OpRequestRef> *to_wake,
271 bool *requeue_recovery,
272 bool *requeue_snaptrimmer) {
273 switch (type) {
274 case ObjectContext::RWState::RWWRITE:
275 rwstate.put_write(to_wake);
276 break;
277 case ObjectContext::RWState::RWREAD:
278 rwstate.put_read(to_wake);
279 break;
280 case ObjectContext::RWState::RWEXCL:
281 rwstate.put_excl(to_wake);
282 break;
283 default:
284 assert(0 == "invalid lock type");
285 }
286 if (rwstate.empty() && rwstate.recovery_read_marker) {
287 rwstate.recovery_read_marker = false;
288 *requeue_recovery = true;
289 }
290 if (rwstate.empty() && rwstate.snaptrimmer_write_marker) {
291 rwstate.snaptrimmer_write_marker = false;
292 *requeue_snaptrimmer = true;
293 }
294 }
295 bool is_request_pending() {
296 return (rwstate.count > 0);
297 }
298
299 ObjectContext()
300 : ssc(NULL),
301 destructor_callback(0),
302 lock("PrimaryLogPG::ObjectContext::lock"),
303 unstable_writes(0), readers(0), writers_waiting(0), readers_waiting(0),
304 blocked(false), requeue_scrub_on_unblock(false) {}
305
306 ~ObjectContext() {
307 assert(rwstate.empty());
308 if (destructor_callback)
309 destructor_callback->complete(0);
310 }
311
312 void start_block() {
313 assert(!blocked);
314 blocked = true;
315 }
316 void stop_block() {
317 assert(blocked);
318 blocked = false;
319 }
320 bool is_blocked() const {
321 return blocked;
322 }
323
324 // do simple synchronous mutual exclusion, for now. no waitqueues or anything fancy.
325 void ondisk_write_lock() {
326 lock.Lock();
327 writers_waiting++;
328 while (readers_waiting || readers)
329 cond.Wait(lock);
330 writers_waiting--;
331 unstable_writes++;
332 lock.Unlock();
333 }
334 void ondisk_write_unlock() {
335 lock.Lock();
336 assert(unstable_writes > 0);
337 unstable_writes--;
338 if (!unstable_writes && readers_waiting)
339 cond.Signal();
340 lock.Unlock();
341 }
342 void ondisk_read_lock() {
343 lock.Lock();
344 readers_waiting++;
345 while (unstable_writes)
346 cond.Wait(lock);
347 readers_waiting--;
348 readers++;
349 lock.Unlock();
350 }
351 void ondisk_read_unlock() {
352 lock.Lock();
353 assert(readers > 0);
354 readers--;
355 if (!readers && writers_waiting)
356 cond.Signal();
357 lock.Unlock();
358 }
359
360 /// in-progress copyfrom ops for this object
361 bool blocked:1;
362 bool requeue_scrub_on_unblock:1; // true if we need to requeue scrub on unblock
363
364 };
365
366 inline ostream& operator<<(ostream& out, const ObjectState& obs)
367 {
368 out << obs.oi.soid;
369 if (!obs.exists)
370 out << "(dne)";
371 return out;
372 }
373
374 inline ostream& operator<<(ostream& out, const ObjectContext::RWState& rw)
375 {
376 return out << "rwstate(" << rw.get_state_name()
377 << " n=" << rw.count
378 << " w=" << rw.waiters.size()
379 << ")";
380 }
381
382 inline ostream& operator<<(ostream& out, const ObjectContext& obc)
383 {
384 return out << "obc(" << obc.obs << " " << obc.rwstate << ")";
385 }
386
387 class ObcLockManager {
388 struct ObjectLockState {
389 ObjectContextRef obc;
390 ObjectContext::RWState::State type;
391 ObjectLockState(
392 ObjectContextRef obc,
393 ObjectContext::RWState::State type)
394 : obc(obc), type(type) {}
395 };
396 map<hobject_t, ObjectLockState> locks;
397 public:
398 ObcLockManager() = default;
399 ObcLockManager(ObcLockManager &&) = default;
400 ObcLockManager(const ObcLockManager &) = delete;
401 ObcLockManager &operator=(ObcLockManager &&) = default;
402 bool empty() const {
403 return locks.empty();
404 }
405 bool get_lock_type(
406 ObjectContext::RWState::State type,
407 const hobject_t &hoid,
408 ObjectContextRef obc,
409 OpRequestRef op) {
410 assert(locks.find(hoid) == locks.end());
411 if (obc->get_lock_type(op, type)) {
412 locks.insert(make_pair(hoid, ObjectLockState(obc, type)));
413 return true;
414 } else {
415 return false;
416 }
417 }
418 /// Get write lock, ignore starvation
419 bool take_write_lock(
420 const hobject_t &hoid,
421 ObjectContextRef obc) {
422 assert(locks.find(hoid) == locks.end());
423 if (obc->rwstate.take_write_lock()) {
424 locks.insert(
425 make_pair(
426 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
427 return true;
428 } else {
429 return false;
430 }
431 }
432 /// Get write lock for snap trim
433 bool get_snaptrimmer_write(
434 const hobject_t &hoid,
435 ObjectContextRef obc,
436 bool mark_if_unsuccessful) {
437 assert(locks.find(hoid) == locks.end());
438 if (obc->get_snaptrimmer_write(mark_if_unsuccessful)) {
439 locks.insert(
440 make_pair(
441 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
442 return true;
443 } else {
444 return false;
445 }
446 }
447 /// Get write lock greedy
448 bool get_write_greedy(
449 const hobject_t &hoid,
450 ObjectContextRef obc,
451 OpRequestRef op) {
452 assert(locks.find(hoid) == locks.end());
453 if (obc->get_write_greedy(op)) {
454 locks.insert(
455 make_pair(
456 hoid, ObjectLockState(obc, ObjectContext::RWState::RWWRITE)));
457 return true;
458 } else {
459 return false;
460 }
461 }
462
463 /// try get read lock
464 bool try_get_read_lock(
465 const hobject_t &hoid,
466 ObjectContextRef obc) {
467 assert(locks.find(hoid) == locks.end());
468 if (obc->try_get_read_lock()) {
469 locks.insert(
470 make_pair(
471 hoid,
472 ObjectLockState(obc, ObjectContext::RWState::RWREAD)));
473 return true;
474 } else {
475 return false;
476 }
477 }
478
479 void put_locks(
480 list<pair<hobject_t, list<OpRequestRef> > > *to_requeue,
481 bool *requeue_recovery,
482 bool *requeue_snaptrimmer) {
483 for (auto p: locks) {
484 list<OpRequestRef> _to_requeue;
485 p.second.obc->put_lock_type(
486 p.second.type,
487 &_to_requeue,
488 requeue_recovery,
489 requeue_snaptrimmer);
490 if (to_requeue) {
491 to_requeue->push_back(
492 make_pair(
493 p.second.obc->obs.oi.soid,
494 std::move(_to_requeue)));
495 }
496 }
497 locks.clear();
498 }
499 ~ObcLockManager() {
500 assert(locks.empty());
501 }
502 };
503
504
505
506 #endif