]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / utilities / transactions / lock / range / range_tree / range_tree_lock_tracker.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #ifndef ROCKSDB_LITE
7 #ifndef OS_WIN
8
9 #include "utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h"
10
11 #include "utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h"
12
13 namespace ROCKSDB_NAMESPACE {
14
15 RangeLockList *RangeTreeLockTracker::getOrCreateList() {
16 if (range_list_) return range_list_.get();
17
18 // Doesn't exist, create
19 range_list_.reset(new RangeLockList());
20 return range_list_.get();
21 }
22
23 void RangeTreeLockTracker::Track(const PointLockRequest &lock_req) {
24 DBT key_dbt;
25 std::string key;
26 serialize_endpoint(Endpoint(lock_req.key, false), &key);
27 toku_fill_dbt(&key_dbt, key.data(), key.size());
28 RangeLockList *rl = getOrCreateList();
29 rl->Append(lock_req.column_family_id, &key_dbt, &key_dbt);
30 }
31
32 void RangeTreeLockTracker::Track(const RangeLockRequest &lock_req) {
33 DBT start_dbt, end_dbt;
34 std::string start_key, end_key;
35
36 serialize_endpoint(lock_req.start_endp, &start_key);
37 serialize_endpoint(lock_req.end_endp, &end_key);
38
39 toku_fill_dbt(&start_dbt, start_key.data(), start_key.size());
40 toku_fill_dbt(&end_dbt, end_key.data(), end_key.size());
41
42 RangeLockList *rl = getOrCreateList();
43 rl->Append(lock_req.column_family_id, &start_dbt, &end_dbt);
44 }
45
46 PointLockStatus RangeTreeLockTracker::GetPointLockStatus(
47 ColumnFamilyId /*cf_id*/, const std::string & /*key*/) const {
48 // This function is not expected to be called as RangeTreeLockTracker::
49 // IsPointLockSupported() returns false. Return the status which indicates
50 // the point is not locked.
51 PointLockStatus p;
52 p.locked = false;
53 p.exclusive = true;
54 p.seq = 0;
55 return p;
56 }
57
58 void RangeTreeLockTracker::Clear() { range_list_.reset(); }
59
60 void RangeLockList::Append(ColumnFamilyId cf_id, const DBT *left_key,
61 const DBT *right_key) {
62 MutexLock l(&mutex_);
63 // Only the transaction owner thread calls this function.
64 // The same thread does the lock release, so we can be certain nobody is
65 // releasing the locks concurrently.
66 assert(!releasing_locks_.load());
67 auto it = buffers_.find(cf_id);
68 if (it == buffers_.end()) {
69 // create a new one
70 it = buffers_.emplace(cf_id, std::make_shared<toku::range_buffer>()).first;
71 it->second->create();
72 }
73 it->second->append(left_key, right_key);
74 }
75
76 void RangeLockList::ReleaseLocks(RangeTreeLockManager *mgr,
77 PessimisticTransaction *txn,
78 bool all_trx_locks) {
79 {
80 MutexLock l(&mutex_);
81 // The lt->release_locks() call below will walk range_list->buffer_. We
82 // need to prevent lock escalation callback from replacing
83 // range_list->buffer_ while we are doing that.
84 //
85 // Additional complication here is internal mutex(es) in the locktree
86 // (let's call them latches):
87 // - Lock escalation first obtains latches on the lock tree
88 // - Then, it calls RangeTreeLockManager::on_escalate to replace
89 // transaction's range_list->buffer_. = Access to that buffer must be
90 // synchronized, so it will want to acquire the range_list->mutex_.
91 //
92 // While in this function we would want to do the reverse:
93 // - Acquire range_list->mutex_ to prevent access to the range_list.
94 // - Then, lt->release_locks() call will walk through the range_list
95 // - and acquire latches on parts of the lock tree to remove locks from
96 // it.
97 //
98 // How do we avoid the deadlock? The idea is that here we set
99 // releasing_locks_=true, and release the mutex.
100 // All other users of the range_list must:
101 // - Acquire the mutex, then check that releasing_locks_=false.
102 // (the code in this function doesnt do that as there's only one thread
103 // that releases transaction's locks)
104 releasing_locks_.store(true);
105 }
106
107 for (auto it : buffers_) {
108 // Don't try to call release_locks() if the buffer is empty! if we are
109 // not holding any locks, the lock tree might be in the STO-mode with
110 // another transaction, and our attempt to release an empty set of locks
111 // will cause an assertion failure.
112 if (it.second->get_num_ranges()) {
113 auto lt_ptr = mgr->GetLockTreeForCF(it.first);
114 toku::locktree *lt = lt_ptr.get();
115
116 lt->release_locks((TXNID)txn, it.second.get(), all_trx_locks);
117
118 it.second->destroy();
119 it.second->create();
120
121 toku::lock_request::retry_all_lock_requests(lt,
122 wait_callback_for_locktree);
123 }
124 }
125
126 Clear();
127 releasing_locks_.store(false);
128 }
129
130 void RangeLockList::ReplaceLocks(const toku::locktree *lt,
131 const toku::range_buffer &buffer) {
132 MutexLock l(&mutex_);
133 if (releasing_locks_.load()) {
134 // Do nothing. The transaction is releasing its locks, so it will not care
135 // about having a correct list of ranges. (In TokuDB,
136 // toku_db_txn_escalate_callback() makes use of this property, too)
137 return;
138 }
139
140 ColumnFamilyId cf_id = (ColumnFamilyId)lt->get_dict_id().dictid;
141
142 auto it = buffers_.find(cf_id);
143 it->second->destroy();
144 it->second->create();
145
146 toku::range_buffer::iterator iter(&buffer);
147 toku::range_buffer::iterator::record rec;
148 while (iter.current(&rec)) {
149 it->second->append(rec.get_left_key(), rec.get_right_key());
150 iter.next();
151 }
152 }
153
154 } // namespace ROCKSDB_NAMESPACE
155 #endif // OS_WIN
156 #endif // ROCKSDB_LITE