]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/libpmemobj/sync.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2015-2018, Intel Corporation */
5 * sync.c -- persistent memory resident synchronization primitives
16 #include "valgrind_internal.h"
19 #define RECORD_LOCK(init, type, p) \
21 PMEM##type##_internal *head = pop->type##_head;\
22 while (!util_bool_compare_and_swap64(&pop->type##_head, head,\
24 head = pop->type##_head;\
26 p->PMEM##type##_next = head;\
29 #define RECORD_LOCK(init, type, p)
33 * _get_value -- (internal) atomically initialize and return a value.
34 * Returns -1 on error, 0 if the caller is not the value
35 * initializer, 1 if the caller is the value initializer.
38 _get_value(uint64_t pop_runid
, volatile uint64_t *runid
, void *value
, void *arg
,
39 int (*init_value
)(void *value
, void *arg
))
44 while ((tmp_runid
= *runid
) != pop_runid
) {
45 if (tmp_runid
== pop_runid
- 1)
48 if (!util_bool_compare_and_swap64(runid
, tmp_runid
,
54 if (init_value(value
, arg
)) {
55 ERR("error initializing lock");
56 util_fetch_and_and64(runid
, 0);
60 if (util_bool_compare_and_swap64(runid
, pop_runid
- 1,
62 ERR("error setting lock runid");
71 * get_mutex -- (internal) atomically initialize, record and return a mutex
73 static inline os_mutex_t
*
74 get_mutex(PMEMobjpool
*pop
, PMEMmutex_internal
*imp
)
76 if (likely(imp
->pmemmutex
.runid
== pop
->run_id
))
77 return &imp
->PMEMmutex_lock
;
79 volatile uint64_t *runid
= &imp
->pmemmutex
.runid
;
81 LOG(5, "PMEMmutex %p pop->run_id %" PRIu64
" pmemmutex.runid %" PRIu64
,
82 imp
, pop
->run_id
, *runid
);
84 ASSERTeq((uintptr_t)runid
% util_alignof(uint64_t), 0);
86 COMPILE_ERROR_ON(sizeof(PMEMmutex
) != sizeof(PMEMmutex_internal
));
87 COMPILE_ERROR_ON(util_alignof(PMEMmutex
) != util_alignof(os_mutex_t
));
89 VALGRIND_REMOVE_PMEM_MAPPING(imp
, _POBJ_CL_SIZE
);
91 int initializer
= _get_value(pop
->run_id
, runid
, &imp
->PMEMmutex_lock
,
92 NULL
, (void *)os_mutex_init
);
93 if (initializer
== -1) {
97 RECORD_LOCK(initializer
, mutex
, imp
);
99 return &imp
->PMEMmutex_lock
;
103 * get_rwlock -- (internal) atomically initialize, record and return a rwlock
105 static inline os_rwlock_t
*
106 get_rwlock(PMEMobjpool
*pop
, PMEMrwlock_internal
*irp
)
108 if (likely(irp
->pmemrwlock
.runid
== pop
->run_id
))
109 return &irp
->PMEMrwlock_lock
;
111 volatile uint64_t *runid
= &irp
->pmemrwlock
.runid
;
113 LOG(5, "PMEMrwlock %p pop->run_id %"\
114 PRIu64
" pmemrwlock.runid %" PRIu64
,
115 irp
, pop
->run_id
, *runid
);
117 ASSERTeq((uintptr_t)runid
% util_alignof(uint64_t), 0);
119 COMPILE_ERROR_ON(sizeof(PMEMrwlock
) != sizeof(PMEMrwlock_internal
));
120 COMPILE_ERROR_ON(util_alignof(PMEMrwlock
)
121 != util_alignof(os_rwlock_t
));
123 VALGRIND_REMOVE_PMEM_MAPPING(irp
, _POBJ_CL_SIZE
);
125 int initializer
= _get_value(pop
->run_id
, runid
, &irp
->PMEMrwlock_lock
,
126 NULL
, (void *)os_rwlock_init
);
127 if (initializer
== -1) {
131 RECORD_LOCK(initializer
, rwlock
, irp
);
133 return &irp
->PMEMrwlock_lock
;
137 * get_cond -- (internal) atomically initialize, record and return a
140 static inline os_cond_t
*
141 get_cond(PMEMobjpool
*pop
, PMEMcond_internal
*icp
)
143 if (likely(icp
->pmemcond
.runid
== pop
->run_id
))
144 return &icp
->PMEMcond_cond
;
146 volatile uint64_t *runid
= &icp
->pmemcond
.runid
;
148 LOG(5, "PMEMcond %p pop->run_id %" PRIu64
" pmemcond.runid %" PRIu64
,
149 icp
, pop
->run_id
, *runid
);
151 ASSERTeq((uintptr_t)runid
% util_alignof(uint64_t), 0);
153 COMPILE_ERROR_ON(sizeof(PMEMcond
) != sizeof(PMEMcond_internal
));
154 COMPILE_ERROR_ON(util_alignof(PMEMcond
) != util_alignof(os_cond_t
));
156 VALGRIND_REMOVE_PMEM_MAPPING(icp
, _POBJ_CL_SIZE
);
158 int initializer
= _get_value(pop
->run_id
, runid
, &icp
->PMEMcond_cond
,
159 NULL
, (void *)os_cond_init
);
160 if (initializer
== -1) {
164 RECORD_LOCK(initializer
, cond
, icp
);
166 return &icp
->PMEMcond_cond
;
170 * pmemobj_mutex_zero -- zero-initialize a pmem resident mutex
172 * This function is not MT safe.
175 pmemobj_mutex_zero(PMEMobjpool
*pop
, PMEMmutex
*mutexp
)
177 LOG(3, "pop %p mutex %p", pop
, mutexp
);
179 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
181 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
182 mutexip
->pmemmutex
.runid
= 0;
183 pmemops_persist(&pop
->p_ops
, &mutexip
->pmemmutex
.runid
,
184 sizeof(mutexip
->pmemmutex
.runid
));
188 * pmemobj_mutex_lock -- lock a pmem resident mutex
190 * Atomically initializes and locks a PMEMmutex, otherwise behaves as its
194 pmemobj_mutex_lock(PMEMobjpool
*pop
, PMEMmutex
*mutexp
)
196 LOG(3, "pop %p mutex %p", pop
, mutexp
);
198 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
200 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
201 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
206 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
208 return os_mutex_lock(mutex
);
212 * pmemobj_mutex_assert_locked -- checks whether mutex is locked.
214 * Returns 0 when mutex is locked.
217 pmemobj_mutex_assert_locked(PMEMobjpool
*pop
, PMEMmutex
*mutexp
)
219 LOG(3, "pop %p mutex %p", pop
, mutexp
);
221 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
223 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
224 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
228 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
230 int ret
= os_mutex_trylock(mutex
);
234 util_mutex_unlock(mutex
);
236 * There's no good error code for this case. EINVAL is used for
237 * something else here.
245 * pmemobj_mutex_timedlock -- lock a pmem resident mutex
247 * Atomically initializes and locks a PMEMmutex, otherwise behaves as its
251 pmemobj_mutex_timedlock(PMEMobjpool
*pop
, PMEMmutex
*__restrict mutexp
,
252 const struct timespec
*__restrict abs_timeout
)
254 LOG(3, "pop %p mutex %p", pop
, mutexp
);
256 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
258 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
259 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
263 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
265 return os_mutex_timedlock(mutex
, abs_timeout
);
269 * pmemobj_mutex_trylock -- trylock a pmem resident mutex
271 * Atomically initializes and trylocks a PMEMmutex, otherwise behaves as its
275 pmemobj_mutex_trylock(PMEMobjpool
*pop
, PMEMmutex
*mutexp
)
277 LOG(3, "pop %p mutex %p", pop
, mutexp
);
279 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
281 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
282 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
286 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
288 return os_mutex_trylock(mutex
);
292 * pmemobj_mutex_unlock -- unlock a pmem resident mutex
295 pmemobj_mutex_unlock(PMEMobjpool
*pop
, PMEMmutex
*mutexp
)
297 LOG(3, "pop %p mutex %p", pop
, mutexp
);
299 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
301 /* XXX potential performance improvement - move GET to debug version */
302 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
303 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
307 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
309 return os_mutex_unlock(mutex
);
313 * pmemobj_rwlock_zero -- zero-initialize a pmem resident rwlock
315 * This function is not MT safe.
318 pmemobj_rwlock_zero(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
320 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
322 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
324 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
325 rwlockip
->pmemrwlock
.runid
= 0;
326 pmemops_persist(&pop
->p_ops
, &rwlockip
->pmemrwlock
.runid
,
327 sizeof(rwlockip
->pmemrwlock
.runid
));
331 * pmemobj_rwlock_rdlock -- rdlock a pmem resident mutex
333 * Atomically initializes and rdlocks a PMEMrwlock, otherwise behaves as its
337 pmemobj_rwlock_rdlock(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
339 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
341 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
343 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
344 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
348 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
350 return os_rwlock_rdlock(rwlock
);
354 * pmemobj_rwlock_wrlock -- wrlock a pmem resident mutex
356 * Atomically initializes and wrlocks a PMEMrwlock, otherwise behaves as its
360 pmemobj_rwlock_wrlock(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
362 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
364 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
366 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
367 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
371 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
373 return os_rwlock_wrlock(rwlock
);
377 * pmemobj_rwlock_timedrdlock -- timedrdlock a pmem resident mutex
379 * Atomically initializes and timedrdlocks a PMEMrwlock, otherwise behaves as
380 * its POSIX counterpart.
383 pmemobj_rwlock_timedrdlock(PMEMobjpool
*pop
, PMEMrwlock
*__restrict rwlockp
,
384 const struct timespec
*__restrict abs_timeout
)
386 LOG(3, "pop %p rwlock %p timeout sec %ld nsec %ld", pop
, rwlockp
,
387 abs_timeout
->tv_sec
, abs_timeout
->tv_nsec
);
389 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
391 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
392 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
396 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
398 return os_rwlock_timedrdlock(rwlock
, abs_timeout
);
402 * pmemobj_rwlock_timedwrlock -- timedwrlock a pmem resident mutex
404 * Atomically initializes and timedwrlocks a PMEMrwlock, otherwise behaves as
405 * its POSIX counterpart.
408 pmemobj_rwlock_timedwrlock(PMEMobjpool
*pop
, PMEMrwlock
*__restrict rwlockp
,
409 const struct timespec
*__restrict abs_timeout
)
411 LOG(3, "pop %p rwlock %p timeout sec %ld nsec %ld", pop
, rwlockp
,
412 abs_timeout
->tv_sec
, abs_timeout
->tv_nsec
);
414 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
416 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
417 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
421 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
423 return os_rwlock_timedwrlock(rwlock
, abs_timeout
);
427 * pmemobj_rwlock_tryrdlock -- tryrdlock a pmem resident mutex
429 * Atomically initializes and tryrdlocks a PMEMrwlock, otherwise behaves as its
433 pmemobj_rwlock_tryrdlock(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
435 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
437 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
439 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
440 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
444 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
446 return os_rwlock_tryrdlock(rwlock
);
450 * pmemobj_rwlock_trywrlock -- trywrlock a pmem resident mutex
452 * Atomically initializes and trywrlocks a PMEMrwlock, otherwise behaves as its
456 pmemobj_rwlock_trywrlock(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
458 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
460 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
462 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
463 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
467 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
469 return os_rwlock_trywrlock(rwlock
);
473 * pmemobj_rwlock_unlock -- unlock a pmem resident rwlock
476 pmemobj_rwlock_unlock(PMEMobjpool
*pop
, PMEMrwlock
*rwlockp
)
478 LOG(3, "pop %p rwlock %p", pop
, rwlockp
);
480 ASSERTeq(pop
, pmemobj_pool_by_ptr(rwlockp
));
482 /* XXX potential performance improvement - move GET to debug version */
483 PMEMrwlock_internal
*rwlockip
= (PMEMrwlock_internal
*)rwlockp
;
484 os_rwlock_t
*rwlock
= get_rwlock(pop
, rwlockip
);
488 ASSERTeq((uintptr_t)rwlock
% util_alignof(os_rwlock_t
), 0);
490 return os_rwlock_unlock(rwlock
);
494 * pmemobj_cond_zero -- zero-initialize a pmem resident condition variable
496 * This function is not MT safe.
499 pmemobj_cond_zero(PMEMobjpool
*pop
, PMEMcond
*condp
)
501 LOG(3, "pop %p cond %p", pop
, condp
);
503 ASSERTeq(pop
, pmemobj_pool_by_ptr(condp
));
505 PMEMcond_internal
*condip
= (PMEMcond_internal
*)condp
;
506 condip
->pmemcond
.runid
= 0;
507 pmemops_persist(&pop
->p_ops
, &condip
->pmemcond
.runid
,
508 sizeof(condip
->pmemcond
.runid
));
512 * pmemobj_cond_broadcast -- broadcast a pmem resident condition variable
514 * Atomically initializes and broadcast a PMEMcond, otherwise behaves as its
518 pmemobj_cond_broadcast(PMEMobjpool
*pop
, PMEMcond
*condp
)
520 LOG(3, "pop %p cond %p", pop
, condp
);
522 ASSERTeq(pop
, pmemobj_pool_by_ptr(condp
));
524 PMEMcond_internal
*condip
= (PMEMcond_internal
*)condp
;
525 os_cond_t
*cond
= get_cond(pop
, condip
);
529 ASSERTeq((uintptr_t)cond
% util_alignof(os_cond_t
), 0);
531 return os_cond_broadcast(cond
);
535 * pmemobj_cond_signal -- signal a pmem resident condition variable
537 * Atomically initializes and signal a PMEMcond, otherwise behaves as its
541 pmemobj_cond_signal(PMEMobjpool
*pop
, PMEMcond
*condp
)
543 LOG(3, "pop %p cond %p", pop
, condp
);
545 ASSERTeq(pop
, pmemobj_pool_by_ptr(condp
));
547 PMEMcond_internal
*condip
= (PMEMcond_internal
*)condp
;
548 os_cond_t
*cond
= get_cond(pop
, condip
);
552 ASSERTeq((uintptr_t)cond
% util_alignof(os_cond_t
), 0);
554 return os_cond_signal(cond
);
558 * pmemobj_cond_timedwait -- timedwait on a pmem resident condition variable
560 * Atomically initializes and timedwait on a PMEMcond, otherwise behaves as its
564 pmemobj_cond_timedwait(PMEMobjpool
*pop
, PMEMcond
*__restrict condp
,
565 PMEMmutex
*__restrict mutexp
,
566 const struct timespec
*__restrict abs_timeout
)
568 LOG(3, "pop %p cond %p mutex %p abstime sec %ld nsec %ld", pop
, condp
,
569 mutexp
, abs_timeout
->tv_sec
, abs_timeout
->tv_nsec
);
571 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
572 ASSERTeq(pop
, pmemobj_pool_by_ptr(condp
));
574 PMEMcond_internal
*condip
= (PMEMcond_internal
*)condp
;
575 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
576 os_cond_t
*cond
= get_cond(pop
, condip
);
577 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
578 if ((cond
== NULL
) || (mutex
== NULL
))
581 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
582 ASSERTeq((uintptr_t)cond
% util_alignof(os_cond_t
), 0);
584 return os_cond_timedwait(cond
, mutex
, abs_timeout
);
588 * pmemobj_cond_wait -- wait on a pmem resident condition variable
590 * Atomically initializes and wait on a PMEMcond, otherwise behaves as its
594 pmemobj_cond_wait(PMEMobjpool
*pop
, PMEMcond
*condp
,
595 PMEMmutex
*__restrict mutexp
)
597 LOG(3, "pop %p cond %p mutex %p", pop
, condp
, mutexp
);
599 ASSERTeq(pop
, pmemobj_pool_by_ptr(mutexp
));
600 ASSERTeq(pop
, pmemobj_pool_by_ptr(condp
));
602 PMEMcond_internal
*condip
= (PMEMcond_internal
*)condp
;
603 PMEMmutex_internal
*mutexip
= (PMEMmutex_internal
*)mutexp
;
604 os_cond_t
*cond
= get_cond(pop
, condip
);
605 os_mutex_t
*mutex
= get_mutex(pop
, mutexip
);
606 if ((cond
== NULL
) || (mutex
== NULL
))
609 ASSERTeq((uintptr_t)mutex
% util_alignof(os_mutex_t
), 0);
610 ASSERTeq((uintptr_t)cond
% util_alignof(os_cond_t
), 0);
612 return os_cond_wait(cond
, mutex
);
616 * pmemobj_volatile -- atomically initialize, record and return a
620 pmemobj_volatile(PMEMobjpool
*pop
, struct pmemvlt
*vlt
,
621 void *ptr
, size_t size
,
622 int (*constr
)(void *ptr
, void *arg
), void *arg
)
624 LOG(3, "pop %p vlt %p ptr %p constr %p arg %p", pop
, vlt
, ptr
,
627 if (likely(vlt
->runid
== pop
->run_id
))
630 VALGRIND_REMOVE_PMEM_MAPPING(ptr
, size
);
632 VALGRIND_ADD_TO_TX(vlt
, sizeof(*vlt
));
633 if (_get_value(pop
->run_id
, &vlt
->runid
, ptr
, arg
, constr
) < 0) {
634 VALGRIND_REMOVE_FROM_TX(vlt
, sizeof(*vlt
));
638 VALGRIND_REMOVE_FROM_TX(vlt
, sizeof(*vlt
));
639 VALGRIND_SET_CLEAN(vlt
, sizeof(*vlt
));