1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
9 #include "utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h"
11 #include "utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h"
13 namespace ROCKSDB_NAMESPACE
{
15 RangeLockList
*RangeTreeLockTracker::getOrCreateList() {
16 if (range_list_
) return range_list_
.get();
18 // Doesn't exist, create
19 range_list_
.reset(new RangeLockList());
20 return range_list_
.get();
23 void RangeTreeLockTracker::Track(const PointLockRequest
&lock_req
) {
26 serialize_endpoint(Endpoint(lock_req
.key
, false), &key
);
27 toku_fill_dbt(&key_dbt
, key
.data(), key
.size());
28 RangeLockList
*rl
= getOrCreateList();
29 rl
->Append(lock_req
.column_family_id
, &key_dbt
, &key_dbt
);
32 void RangeTreeLockTracker::Track(const RangeLockRequest
&lock_req
) {
33 DBT start_dbt
, end_dbt
;
34 std::string start_key
, end_key
;
36 serialize_endpoint(lock_req
.start_endp
, &start_key
);
37 serialize_endpoint(lock_req
.end_endp
, &end_key
);
39 toku_fill_dbt(&start_dbt
, start_key
.data(), start_key
.size());
40 toku_fill_dbt(&end_dbt
, end_key
.data(), end_key
.size());
42 RangeLockList
*rl
= getOrCreateList();
43 rl
->Append(lock_req
.column_family_id
, &start_dbt
, &end_dbt
);
46 PointLockStatus
RangeTreeLockTracker::GetPointLockStatus(
47 ColumnFamilyId
/*cf_id*/, const std::string
& /*key*/) const {
48 // This function is not expected to be called as RangeTreeLockTracker::
49 // IsPointLockSupported() returns false. Return the status which indicates
50 // the point is not locked.
58 void RangeTreeLockTracker::Clear() { range_list_
.reset(); }
60 void RangeLockList::Append(ColumnFamilyId cf_id
, const DBT
*left_key
,
61 const DBT
*right_key
) {
63 // Only the transaction owner thread calls this function.
64 // The same thread does the lock release, so we can be certain nobody is
65 // releasing the locks concurrently.
66 assert(!releasing_locks_
.load());
67 auto it
= buffers_
.find(cf_id
);
68 if (it
== buffers_
.end()) {
70 it
= buffers_
.emplace(cf_id
, std::make_shared
<toku::range_buffer
>()).first
;
73 it
->second
->append(left_key
, right_key
);
76 void RangeLockList::ReleaseLocks(RangeTreeLockManager
*mgr
,
77 PessimisticTransaction
*txn
,
81 // The lt->release_locks() call below will walk range_list->buffer_. We
82 // need to prevent lock escalation callback from replacing
83 // range_list->buffer_ while we are doing that.
85 // Additional complication here is internal mutex(es) in the locktree
86 // (let's call them latches):
87 // - Lock escalation first obtains latches on the lock tree
88 // - Then, it calls RangeTreeLockManager::on_escalate to replace
89 // transaction's range_list->buffer_. = Access to that buffer must be
90 // synchronized, so it will want to acquire the range_list->mutex_.
92 // While in this function we would want to do the reverse:
93 // - Acquire range_list->mutex_ to prevent access to the range_list.
94 // - Then, lt->release_locks() call will walk through the range_list
95 // - and acquire latches on parts of the lock tree to remove locks from
98 // How do we avoid the deadlock? The idea is that here we set
99 // releasing_locks_=true, and release the mutex.
100 // All other users of the range_list must:
101 // - Acquire the mutex, then check that releasing_locks_=false.
102 // (the code in this function doesnt do that as there's only one thread
103 // that releases transaction's locks)
104 releasing_locks_
.store(true);
107 for (auto it
: buffers_
) {
108 // Don't try to call release_locks() if the buffer is empty! if we are
109 // not holding any locks, the lock tree might be in the STO-mode with
110 // another transaction, and our attempt to release an empty set of locks
111 // will cause an assertion failure.
112 if (it
.second
->get_num_ranges()) {
113 auto lt_ptr
= mgr
->GetLockTreeForCF(it
.first
);
114 toku::locktree
*lt
= lt_ptr
.get();
116 lt
->release_locks((TXNID
)txn
, it
.second
.get(), all_trx_locks
);
118 it
.second
->destroy();
121 toku::lock_request::retry_all_lock_requests(lt
,
122 wait_callback_for_locktree
);
127 releasing_locks_
.store(false);
130 void RangeLockList::ReplaceLocks(const toku::locktree
*lt
,
131 const toku::range_buffer
&buffer
) {
132 MutexLock
l(&mutex_
);
133 if (releasing_locks_
.load()) {
134 // Do nothing. The transaction is releasing its locks, so it will not care
135 // about having a correct list of ranges. (In TokuDB,
136 // toku_db_txn_escalate_callback() makes use of this property, too)
140 ColumnFamilyId cf_id
= (ColumnFamilyId
)lt
->get_dict_id().dictid
;
142 auto it
= buffers_
.find(cf_id
);
143 it
->second
->destroy();
144 it
->second
->create();
146 toku::range_buffer::iterator
iter(&buffer
);
147 toku::range_buffer::iterator::record rec
;
148 while (iter
.current(&rec
)) {
149 it
->second
->append(rec
.get_left_key(), rec
.get_right_key());
154 } // namespace ROCKSDB_NAMESPACE
156 #endif // ROCKSDB_LITE