]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/src/core/systemwide_memory_barrier.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / seastar / src / core / systemwide_memory_barrier.cc
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18 /*
19 * Copyright 2015 Scylla DB
20 */
21
22 #include <seastar/core/systemwide_memory_barrier.hh>
23 #include <seastar/core/cacheline.hh>
24 #include <seastar/util/log.hh>
25 #include <sys/mman.h>
26 #include <unistd.h>
27 #include <cassert>
28 #include <atomic>
29 #include <mutex>
30
31 #if SEASTAR_HAS_MEMBARRIER
32 #include <linux/membarrier.h>
33 #include <sys/syscall.h>
34 #include <unistd.h>
35 #endif
36
37 namespace seastar {
38
39
40 #ifdef SEASTAR_HAS_MEMBARRIER
41
42 static bool has_native_membarrier = [] {
43 auto r = syscall(SYS_membarrier, MEMBARRIER_CMD_QUERY, 0);
44 if (r == -1) {
45 return false;
46 }
47 int needed = MEMBARRIER_CMD_PRIVATE_EXPEDITED | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED;
48 if ((r & needed) != needed) {
49 return false;
50 }
51 syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0);
52 return true;
53 }();
54
55 static bool try_native_membarrier() {
56 if (has_native_membarrier) {
57 syscall(SYS_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0);
58 return true;
59 }
60 return false;
61 }
62
63 #else
64
65 static bool try_native_membarrier() {
66 return false;
67 }
68
69 #endif
70
71 // cause all threads to invoke a full memory barrier
72 void
73 systemwide_memory_barrier() {
74 if (try_native_membarrier()) {
75 return;
76 }
77
78 // FIXME: use sys_membarrier() when available
79 static thread_local char* mem = [] {
80 void* mem = mmap(nullptr, getpagesize(),
81 PROT_READ | PROT_WRITE,
82 MAP_PRIVATE | MAP_ANONYMOUS,
83 -1, 0) ;
84 assert(mem != MAP_FAILED);
85
86 // If the user specified --lock-memory, then madvise() below will fail
87 // with EINVAL, so we unlock here:
88 auto r = munlock(mem, getpagesize());
89 // munlock may fail on old kernels if we don't have permission. That's not
90 // a problem, since if we don't have permission to unlock, we didn't have
91 // permissions to lock.
92 assert(r == 0 || errno == EPERM);
93
94 return reinterpret_cast<char*>(mem);
95 }();
96 // Force page into memory to make madvise() have real work to do
97 *mem = 3;
98 // Evict page to force kernel to send IPI to all threads, with
99 // a side effect of executing a memory barrier on those threads
100 // FIXME: does this work on ARM?
101 int r2 = madvise(mem, getpagesize(), MADV_DONTNEED);
102 assert(r2 == 0);
103 }
104
105 bool try_systemwide_memory_barrier() {
106 if (try_native_membarrier()) {
107 return true;
108 }
109
110 #ifdef __aarch64__
111
112 // Some (not all) ARM processors can broadcast TLB invalidations using the
113 // TLBI instruction. On those, the mprotect trick won't work.
114 static std::once_flag warn_once;
115 extern logger seastar_logger;
116 std::call_once(warn_once, [] {
117 seastar_logger.warn("membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED) is not available, reactor will not sleep when idle. Upgrade to Linux 4.14 or later");
118 });
119
120 return false;
121
122 #endif
123
124 systemwide_memory_barrier();
125 return true;
126 }
127
128 }
129