]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/port/port_posix.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / port / port_posix.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
20effc67
TL
10#if !defined(OS_WIN)
11
7c673cae
FG
12#include "port/port_posix.h"
13
14#include <assert.h>
15#if defined(__i386__) || defined(__x86_64__)
16#include <cpuid.h>
17#endif
18#include <errno.h>
11fdf7f2 19#include <sched.h>
7c673cae
FG
20#include <signal.h>
21#include <stdio.h>
22#include <string.h>
7c673cae 23#include <sys/resource.h>
f67539c2 24#include <sys/time.h>
7c673cae 25#include <unistd.h>
1e59de90 26
7c673cae 27#include <cstdlib>
1e59de90
TL
28#include <fstream>
29#include <string>
30
31#include "util/string_util.h"
7c673cae 32
f67539c2 33namespace ROCKSDB_NAMESPACE {
494da23a
TL
34
35// We want to give users opportunity to default all the mutexes to adaptive if
36// not specified otherwise. This enables a quick way to conduct various
37// performance related experiements.
38//
39// NB! Support for adaptive mutexes is turned on by definining
40// ROCKSDB_PTHREAD_ADAPTIVE_MUTEX during the compilation. If you use RocksDB
41// build environment then this happens automatically; otherwise it's up to the
42// consumer to define the identifier.
43#ifdef ROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX
44extern const bool kDefaultToAdaptiveMutex = true;
45#else
46extern const bool kDefaultToAdaptiveMutex = false;
47#endif
48
7c673cae
FG
49namespace port {
50
51static int PthreadCall(const char* label, int result) {
1e59de90
TL
52 if (result != 0 && result != ETIMEDOUT && result != EBUSY) {
53 fprintf(stderr, "pthread %s: %s\n", label, errnoStr(result).c_str());
7c673cae
FG
54 abort();
55 }
56 return result;
57}
58
59Mutex::Mutex(bool adaptive) {
1e59de90 60 (void)adaptive;
7c673cae
FG
61#ifdef ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
62 if (!adaptive) {
63 PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
64 } else {
65 pthread_mutexattr_t mutex_attr;
66 PthreadCall("init mutex attr", pthread_mutexattr_init(&mutex_attr));
1e59de90
TL
67 PthreadCall("set mutex attr", pthread_mutexattr_settype(
68 &mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP));
7c673cae 69 PthreadCall("init mutex", pthread_mutex_init(&mu_, &mutex_attr));
1e59de90 70 PthreadCall("destroy mutex attr", pthread_mutexattr_destroy(&mutex_attr));
7c673cae
FG
71 }
72#else
73 PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
1e59de90 74#endif // ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
7c673cae
FG
75}
76
77Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
78
79void Mutex::Lock() {
80 PthreadCall("lock", pthread_mutex_lock(&mu_));
81#ifndef NDEBUG
82 locked_ = true;
83#endif
84}
85
86void Mutex::Unlock() {
87#ifndef NDEBUG
88 locked_ = false;
89#endif
90 PthreadCall("unlock", pthread_mutex_unlock(&mu_));
91}
92
1e59de90
TL
93bool Mutex::TryLock() {
94 bool ret = PthreadCall("trylock", pthread_mutex_trylock(&mu_)) == 0;
95#ifndef NDEBUG
96 if (ret) {
97 locked_ = true;
98 }
99#endif
100 return ret;
101}
102
7c673cae
FG
103void Mutex::AssertHeld() {
104#ifndef NDEBUG
105 assert(locked_);
106#endif
107}
108
1e59de90
TL
109CondVar::CondVar(Mutex* mu) : mu_(mu) {
110 PthreadCall("init cv", pthread_cond_init(&cv_, nullptr));
7c673cae
FG
111}
112
113CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
114
115void CondVar::Wait() {
116#ifndef NDEBUG
117 mu_->locked_ = false;
118#endif
119 PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
120#ifndef NDEBUG
121 mu_->locked_ = true;
122#endif
123}
124
125bool CondVar::TimedWait(uint64_t abs_time_us) {
126 struct timespec ts;
127 ts.tv_sec = static_cast<time_t>(abs_time_us / 1000000);
128 ts.tv_nsec = static_cast<suseconds_t>((abs_time_us % 1000000) * 1000);
129
130#ifndef NDEBUG
131 mu_->locked_ = false;
132#endif
133 int err = pthread_cond_timedwait(&cv_, &mu_->mu_, &ts);
134#ifndef NDEBUG
135 mu_->locked_ = true;
136#endif
137 if (err == ETIMEDOUT) {
138 return true;
139 }
140 if (err != 0) {
141 PthreadCall("timedwait", err);
142 }
143 return false;
144}
145
1e59de90 146void CondVar::Signal() { PthreadCall("signal", pthread_cond_signal(&cv_)); }
7c673cae
FG
147
148void CondVar::SignalAll() {
149 PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
150}
151
152RWMutex::RWMutex() {
153 PthreadCall("init mutex", pthread_rwlock_init(&mu_, nullptr));
154}
155
1e59de90
TL
156RWMutex::~RWMutex() {
157 PthreadCall("destroy mutex", pthread_rwlock_destroy(&mu_));
158}
7c673cae 159
1e59de90
TL
160void RWMutex::ReadLock() {
161 PthreadCall("read lock", pthread_rwlock_rdlock(&mu_));
162}
7c673cae 163
1e59de90
TL
164void RWMutex::WriteLock() {
165 PthreadCall("write lock", pthread_rwlock_wrlock(&mu_));
166}
7c673cae 167
1e59de90
TL
168void RWMutex::ReadUnlock() {
169 PthreadCall("read unlock", pthread_rwlock_unlock(&mu_));
170}
7c673cae 171
1e59de90
TL
172void RWMutex::WriteUnlock() {
173 PthreadCall("write unlock", pthread_rwlock_unlock(&mu_));
174}
7c673cae
FG
175
176int PhysicalCoreID() {
11fdf7f2
TL
177#if defined(ROCKSDB_SCHED_GETCPU_PRESENT) && defined(__x86_64__) && \
178 (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 22))
1e59de90
TL
179 // sched_getcpu uses VDSO getcpu() syscall since 2.22. I believe Linux offers
180 // VDSO support only on x86_64. This is the fastest/preferred method if
181 // available.
11fdf7f2
TL
182 int cpuno = sched_getcpu();
183 if (cpuno < 0) {
184 return -1;
185 }
186 return cpuno;
187#elif defined(__x86_64__) || defined(__i386__)
1e59de90
TL
188 // clang/gcc both provide cpuid.h, which defines __get_cpuid(), for x86_64 and
189 // i386.
7c673cae 190 unsigned eax, ebx = 0, ecx, edx;
11fdf7f2
TL
191 if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
192 return -1;
193 }
7c673cae
FG
194 return ebx >> 24;
195#else
11fdf7f2 196 // give up, the caller can generate a random number or something.
7c673cae
FG
197 return -1;
198#endif
199}
200
201void InitOnce(OnceType* once, void (*initializer)()) {
202 PthreadCall("once", pthread_once(once, initializer));
203}
204
205void Crash(const std::string& srcfile, int srcline) {
206 fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
207 fflush(stdout);
208 kill(getpid(), SIGTERM);
209}
210
211int GetMaxOpenFiles() {
212#if defined(RLIMIT_NOFILE)
213 struct rlimit no_files_limit;
214 if (getrlimit(RLIMIT_NOFILE, &no_files_limit) != 0) {
215 return -1;
216 }
217 // protect against overflow
f67539c2
TL
218 if (static_cast<uintmax_t>(no_files_limit.rlim_cur) >=
219 static_cast<uintmax_t>(std::numeric_limits<int>::max())) {
7c673cae
FG
220 return std::numeric_limits<int>::max();
221 }
222 return static_cast<int>(no_files_limit.rlim_cur);
223#endif
224 return -1;
225}
226
1e59de90 227void* cacheline_aligned_alloc(size_t size) {
11fdf7f2
TL
228#if __GNUC__ < 5 && defined(__SANITIZE_ADDRESS__)
229 return malloc(size);
1e59de90
TL
230#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__))
231 void* m;
11fdf7f2
TL
232 errno = posix_memalign(&m, CACHE_LINE_SIZE, size);
233 return errno ? nullptr : m;
234#else
235 return malloc(size);
236#endif
237}
238
1e59de90 239void cacheline_aligned_free(void* memblock) { free(memblock); }
11fdf7f2 240
f67539c2
TL
241static size_t GetPageSize() {
242#if defined(OS_LINUX) || defined(_SC_PAGESIZE)
243 long v = sysconf(_SC_PAGESIZE);
244 if (v >= 1024) {
245 return static_cast<size_t>(v);
246 }
247#endif
248 // Default assume 4KB
249 return 4U * 1024U;
250}
251
252const size_t kPageSize = GetPageSize();
11fdf7f2 253
20effc67
TL
254void SetCpuPriority(ThreadId id, CpuPriority priority) {
255#ifdef OS_LINUX
256 sched_param param;
257 param.sched_priority = 0;
258 switch (priority) {
259 case CpuPriority::kHigh:
260 sched_setscheduler(id, SCHED_OTHER, &param);
261 setpriority(PRIO_PROCESS, id, -20);
262 break;
263 case CpuPriority::kNormal:
264 sched_setscheduler(id, SCHED_OTHER, &param);
265 setpriority(PRIO_PROCESS, id, 0);
266 break;
267 case CpuPriority::kLow:
268 sched_setscheduler(id, SCHED_OTHER, &param);
269 setpriority(PRIO_PROCESS, id, 19);
270 break;
271 case CpuPriority::kIdle:
272 sched_setscheduler(id, SCHED_IDLE, &param);
273 break;
274 default:
275 assert(false);
276 }
277#else
278 (void)id;
279 (void)priority;
280#endif
281}
282
1e59de90
TL
283int64_t GetProcessID() { return getpid(); }
284
285bool GenerateRfcUuid(std::string* output) {
286 output->clear();
287 std::ifstream f("/proc/sys/kernel/random/uuid");
288 std::getline(f, /*&*/ *output);
289 if (output->size() == 36) {
290 return true;
291 } else {
292 output->clear();
293 return false;
294 }
295}
296
7c673cae 297} // namespace port
f67539c2 298} // namespace ROCKSDB_NAMESPACE
20effc67
TL
299
300#endif