]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/include/seastar/core/posix.hh
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / seastar / include / seastar / core / posix.hh
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2016 ScyllaDB
20 */
21
22#pragma once
23
24#include <seastar/core/sstring.hh>
9f95a23c 25#include "abort_on_ebadf.hh"
11fdf7f2
TL
26#include <sys/types.h>
27#include <sys/stat.h>
28#include <unistd.h>
29#include <assert.h>
30#include <utility>
31#include <fcntl.h>
32#include <sys/ioctl.h>
33#include <sys/eventfd.h>
34#include <sys/timerfd.h>
35#include <sys/socket.h>
36#include <sys/epoll.h>
37#include <sys/mman.h>
38#include <signal.h>
39#include <system_error>
11fdf7f2
TL
40#include <pthread.h>
41#include <signal.h>
42#include <memory>
43#include <chrono>
44#include <sys/uio.h>
45
46#include <seastar/net/socket_defs.hh>
9f95a23c 47#include <seastar/util/std-compat.hh>
11fdf7f2
TL
48
49namespace seastar {
50
51/// \file
52/// \defgroup posix-support POSIX Support
53///
54/// Mostly-internal APIs to provide C++ glue for the underlying POSIX platform;
55/// but can be used by the application when they don't block.
56///
57/// \addtogroup posix-support
58/// @{
59
60inline void throw_system_error_on(bool condition, const char* what_arg = "");
61
62template <typename T>
63inline void throw_kernel_error(T r);
64
65struct mmap_deleter {
66 size_t _size;
67 void operator()(void* ptr) const;
68};
69
70using mmap_area = std::unique_ptr<char[], mmap_deleter>;
71
72mmap_area mmap_anonymous(void* addr, size_t length, int prot, int flags);
73
74class file_desc {
75 int _fd;
76public:
77 file_desc() = delete;
78 file_desc(const file_desc&) = delete;
f67539c2 79 file_desc(file_desc&& x) noexcept : _fd(x._fd) { x._fd = -1; }
11fdf7f2
TL
80 ~file_desc() { if (_fd != -1) { ::close(_fd); } }
81 void operator=(const file_desc&) = delete;
82 file_desc& operator=(file_desc&& x) {
83 if (this != &x) {
84 std::swap(_fd, x._fd);
85 if (x._fd != -1) {
86 x.close();
87 }
88 }
89 return *this;
90 }
91 void close() {
92 assert(_fd != -1);
93 auto r = ::close(_fd);
94 throw_system_error_on(r == -1, "close");
95 _fd = -1;
96 }
97 int get() const { return _fd; }
9f95a23c
TL
98
99 static file_desc from_fd(int fd) {
100 return file_desc(fd);
101 }
102
11fdf7f2
TL
103 static file_desc open(sstring name, int flags, mode_t mode = 0) {
104 int fd = ::open(name.c_str(), flags, mode);
105 throw_system_error_on(fd == -1, "open");
106 return file_desc(fd);
107 }
108 static file_desc socket(int family, int type, int protocol = 0) {
109 int fd = ::socket(family, type, protocol);
110 throw_system_error_on(fd == -1, "socket");
111 return file_desc(fd);
112 }
113 static file_desc eventfd(unsigned initval, int flags) {
114 int fd = ::eventfd(initval, flags);
115 throw_system_error_on(fd == -1, "eventfd");
116 return file_desc(fd);
117 }
118 static file_desc epoll_create(int flags = 0) {
119 int fd = ::epoll_create1(flags);
120 throw_system_error_on(fd == -1, "epoll_create1");
121 return file_desc(fd);
122 }
123 static file_desc timerfd_create(int clockid, int flags) {
124 int fd = ::timerfd_create(clockid, flags);
125 throw_system_error_on(fd == -1, "timerfd_create");
126 return file_desc(fd);
127 }
128 static file_desc temporary(sstring directory);
129 file_desc dup() const {
130 int fd = ::dup(get());
131 throw_system_error_on(fd == -1, "dup");
132 return file_desc(fd);
133 }
9f95a23c
TL
134 file_desc accept(socket_address& sa, int flags = 0) {
135 auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags);
136 throw_system_error_on(ret == -1, "accept4");
137 return file_desc(ret);
138 }
f67539c2 139 static file_desc inotify_init(int flags);
9f95a23c 140 // return nullopt if no connection is availbale to be accepted
f67539c2 141 std::optional<file_desc> try_accept(socket_address& sa, int flags = 0) {
9f95a23c
TL
142 auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags);
143 if (ret == -1 && errno == EAGAIN) {
144 return {};
145 }
11fdf7f2
TL
146 throw_system_error_on(ret == -1, "accept4");
147 return file_desc(ret);
148 }
149 void shutdown(int how) {
150 auto ret = ::shutdown(_fd, how);
151 if (ret == -1 && errno != ENOTCONN) {
152 throw_system_error_on(ret == -1, "shutdown");
153 }
154 }
155 void truncate(size_t size) {
156 auto ret = ::ftruncate(_fd, size);
157 throw_system_error_on(ret, "ftruncate");
158 }
159 int ioctl(int request) {
160 return ioctl(request, 0);
161 }
162 int ioctl(int request, int value) {
163 int r = ::ioctl(_fd, request, value);
164 throw_system_error_on(r == -1, "ioctl");
165 return r;
166 }
167 int ioctl(int request, unsigned int value) {
168 int r = ::ioctl(_fd, request, value);
169 throw_system_error_on(r == -1, "ioctl");
170 return r;
171 }
172 template <class X>
173 int ioctl(int request, X& data) {
174 int r = ::ioctl(_fd, request, &data);
175 throw_system_error_on(r == -1, "ioctl");
176 return r;
177 }
178 template <class X>
179 int ioctl(int request, X&& data) {
180 int r = ::ioctl(_fd, request, &data);
181 throw_system_error_on(r == -1, "ioctl");
182 return r;
183 }
184 template <class X>
185 int setsockopt(int level, int optname, X&& data) {
186 int r = ::setsockopt(_fd, level, optname, &data, sizeof(data));
187 throw_system_error_on(r == -1, "setsockopt");
188 return r;
189 }
190 int setsockopt(int level, int optname, const char* data) {
191 int r = ::setsockopt(_fd, level, optname, data, strlen(data) + 1);
192 throw_system_error_on(r == -1, "setsockopt");
193 return r;
194 }
f67539c2
TL
195 int setsockopt(int level, int optname, const void* data, socklen_t len) {
196 int r = ::setsockopt(_fd, level, optname, data, len);
197 throw_system_error_on(r == -1, "setsockopt");
198 return r;
199 }
11fdf7f2
TL
200 template <typename Data>
201 Data getsockopt(int level, int optname) {
202 Data data;
203 socklen_t len = sizeof(data);
204 memset(&data, 0, len);
205 int r = ::getsockopt(_fd, level, optname, &data, &len);
206 throw_system_error_on(r == -1, "getsockopt");
207 return data;
208 }
209 int getsockopt(int level, int optname, char* data, socklen_t len) {
210 int r = ::getsockopt(_fd, level, optname, data, &len);
211 throw_system_error_on(r == -1, "getsockopt");
212 return r;
213 }
214 size_t size() {
215 struct stat buf;
216 auto r = ::fstat(_fd, &buf);
217 throw_system_error_on(r == -1, "fstat");
218 return buf.st_size;
219 }
f67539c2 220 std::optional<size_t> read(void* buffer, size_t len) {
11fdf7f2
TL
221 auto r = ::read(_fd, buffer, len);
222 if (r == -1 && errno == EAGAIN) {
223 return {};
224 }
225 throw_system_error_on(r == -1, "read");
226 return { size_t(r) };
227 }
f67539c2 228 std::optional<ssize_t> recv(void* buffer, size_t len, int flags) {
11fdf7f2
TL
229 auto r = ::recv(_fd, buffer, len, flags);
230 if (r == -1 && errno == EAGAIN) {
231 return {};
232 }
233 throw_system_error_on(r == -1, "recv");
234 return { ssize_t(r) };
235 }
f67539c2 236 std::optional<size_t> recvmsg(msghdr* mh, int flags) {
11fdf7f2
TL
237 auto r = ::recvmsg(_fd, mh, flags);
238 if (r == -1 && errno == EAGAIN) {
239 return {};
240 }
241 throw_system_error_on(r == -1, "recvmsg");
242 return { size_t(r) };
243 }
f67539c2 244 std::optional<size_t> send(const void* buffer, size_t len, int flags) {
11fdf7f2
TL
245 auto r = ::send(_fd, buffer, len, flags);
246 if (r == -1 && errno == EAGAIN) {
247 return {};
248 }
249 throw_system_error_on(r == -1, "send");
250 return { size_t(r) };
251 }
f67539c2 252 std::optional<size_t> sendto(socket_address& addr, const void* buf, size_t len, int flags) {
9f95a23c 253 auto r = ::sendto(_fd, buf, len, flags, &addr.u.sa, addr.length());
11fdf7f2
TL
254 if (r == -1 && errno == EAGAIN) {
255 return {};
256 }
257 throw_system_error_on(r == -1, "sendto");
258 return { size_t(r) };
259 }
f67539c2 260 std::optional<size_t> sendmsg(const msghdr* msg, int flags) {
11fdf7f2
TL
261 auto r = ::sendmsg(_fd, msg, flags);
262 if (r == -1 && errno == EAGAIN) {
263 return {};
264 }
265 throw_system_error_on(r == -1, "sendmsg");
266 return { size_t(r) };
267 }
268 void bind(sockaddr& sa, socklen_t sl) {
269 auto r = ::bind(_fd, &sa, sl);
270 throw_system_error_on(r == -1, "bind");
271 }
272 void connect(sockaddr& sa, socklen_t sl) {
273 auto r = ::connect(_fd, &sa, sl);
274 if (r == -1 && errno == EINPROGRESS) {
275 return;
276 }
277 throw_system_error_on(r == -1, "connect");
278 }
279 socket_address get_address() {
280 socket_address addr;
9f95a23c 281 auto r = ::getsockname(_fd, &addr.u.sa, &addr.addr_length);
11fdf7f2
TL
282 throw_system_error_on(r == -1, "getsockname");
283 return addr;
284 }
285 void listen(int backlog) {
286 auto fd = ::listen(_fd, backlog);
287 throw_system_error_on(fd == -1, "listen");
288 }
f67539c2 289 std::optional<size_t> write(const void* buf, size_t len) {
11fdf7f2
TL
290 auto r = ::write(_fd, buf, len);
291 if (r == -1 && errno == EAGAIN) {
292 return {};
293 }
294 throw_system_error_on(r == -1, "write");
295 return { size_t(r) };
296 }
f67539c2 297 std::optional<size_t> writev(const iovec *iov, int iovcnt) {
11fdf7f2
TL
298 auto r = ::writev(_fd, iov, iovcnt);
299 if (r == -1 && errno == EAGAIN) {
300 return {};
301 }
302 throw_system_error_on(r == -1, "writev");
303 return { size_t(r) };
304 }
305 size_t pread(void* buf, size_t len, off_t off) {
306 auto r = ::pread(_fd, buf, len, off);
307 throw_system_error_on(r == -1, "pread");
308 return size_t(r);
309 }
310 void timerfd_settime(int flags, const itimerspec& its) {
311 auto r = ::timerfd_settime(_fd, flags, &its, NULL);
312 throw_system_error_on(r == -1, "timerfd_settime");
313 }
314
315 mmap_area map(size_t size, unsigned prot, unsigned flags, size_t offset,
316 void* addr = nullptr) {
317 void *x = mmap(addr, size, prot, flags, _fd, offset);
318 throw_system_error_on(x == MAP_FAILED, "mmap");
319 return mmap_area(static_cast<char*>(x), mmap_deleter{size});
320 }
321
322 mmap_area map_shared_rw(size_t size, size_t offset) {
323 return map(size, PROT_READ | PROT_WRITE, MAP_SHARED, offset);
324 }
325
326 mmap_area map_shared_ro(size_t size, size_t offset) {
327 return map(size, PROT_READ, MAP_SHARED, offset);
328 }
329
330 mmap_area map_private_rw(size_t size, size_t offset) {
331 return map(size, PROT_READ | PROT_WRITE, MAP_PRIVATE, offset);
332 }
333
334 mmap_area map_private_ro(size_t size, size_t offset) {
335 return map(size, PROT_READ, MAP_PRIVATE, offset);
336 }
337
338private:
339 file_desc(int fd) : _fd(fd) {}
340 };
341
342
343namespace posix {
344
345/// Converts a duration value to a `timespec`
346///
347/// \param d a duration value to convert to the POSIX `timespec` format
348/// \return `d` as a `timespec` value
349template <typename Rep, typename Period>
350struct timespec
351to_timespec(std::chrono::duration<Rep, Period> d) {
352 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(d).count();
353 struct timespec ts {};
354 ts.tv_sec = ns / 1000000000;
355 ts.tv_nsec = ns % 1000000000;
356 return ts;
357}
358
359/// Converts a relative start time and an interval to an `itimerspec`
360///
361/// \param base First expiration of the timer, relative to the current time
362/// \param interval period for re-arming the timer
363/// \return `base` and `interval` converted to an `itimerspec`
364template <typename Rep1, typename Period1, typename Rep2, typename Period2>
365struct itimerspec
366to_relative_itimerspec(std::chrono::duration<Rep1, Period1> base, std::chrono::duration<Rep2, Period2> interval) {
367 struct itimerspec its {};
368 its.it_interval = to_timespec(interval);
369 its.it_value = to_timespec(base);
370 return its;
371}
372
373
374/// Converts a time_point and a duration to an `itimerspec`
375///
376/// \param base base time for the timer; must use the same clock as the timer
377/// \param interval period for re-arming the timer
378/// \return `base` and `interval` converted to an `itimerspec`
379template <typename Clock, class Duration, class Rep, class Period>
380struct itimerspec
381to_absolute_itimerspec(std::chrono::time_point<Clock, Duration> base, std::chrono::duration<Rep, Period> interval) {
382 return to_relative_itimerspec(base.time_since_epoch(), interval);
383}
384
385}
386
387class posix_thread {
388public:
389 class attr;
390private:
391 // must allocate, since this class is moveable
392 std::unique_ptr<std::function<void ()>> _func;
393 pthread_t _pthread;
394 bool _valid = true;
395 mmap_area _stack;
396private:
397 static void* start_routine(void* arg) noexcept;
398public:
399 posix_thread(std::function<void ()> func);
400 posix_thread(attr a, std::function<void ()> func);
401 posix_thread(posix_thread&& x);
402 ~posix_thread();
403 void join();
404public:
405 class attr {
406 public:
407 struct stack_size { size_t size = 0; };
408 attr() = default;
409 template <typename... A>
410 attr(A... a) {
411 set(std::forward<A>(a)...);
412 }
413 void set() {}
414 template <typename A, typename... Rest>
415 void set(A a, Rest... rest) {
416 set(std::forward<A>(a));
417 set(std::forward<Rest>(rest)...);
418 }
419 void set(stack_size ss) { _stack_size = ss; }
420 private:
421 stack_size _stack_size;
422 friend class posix_thread;
423 };
424};
425
426
427inline
428void throw_system_error_on(bool condition, const char* what_arg) {
429 if (condition) {
9f95a23c
TL
430 if ((errno == EBADF || errno == ENOTSOCK) && is_abort_on_ebadf_enabled()) {
431 abort();
432 }
11fdf7f2
TL
433 throw std::system_error(errno, std::system_category(), what_arg);
434 }
435}
436
437template <typename T>
438inline
439void throw_kernel_error(T r) {
440 static_assert(std::is_signed<T>::value, "kernel error variables must be signed");
441 if (r < 0) {
9f95a23c
TL
442 auto ec = -r;
443 if ((ec == EBADF || ec == ENOTSOCK) && is_abort_on_ebadf_enabled()) {
444 abort();
445 }
11fdf7f2
TL
446 throw std::system_error(-r, std::system_category());
447 }
448}
449
450template <typename T>
451inline
452void throw_pthread_error(T r) {
453 if (r != 0) {
454 throw std::system_error(r, std::system_category());
455 }
456}
457
458inline
459sigset_t make_sigset_mask(int signo) {
460 sigset_t set;
461 sigemptyset(&set);
462 sigaddset(&set, signo);
463 return set;
464}
465
466inline
467sigset_t make_full_sigset_mask() {
468 sigset_t set;
469 sigfillset(&set);
470 return set;
471}
472
473inline
474sigset_t make_empty_sigset_mask() {
475 sigset_t set;
476 sigemptyset(&set);
477 return set;
478}
479
480inline
481void pin_this_thread(unsigned cpu_id) {
482 cpu_set_t cs;
483 CPU_ZERO(&cs);
484 CPU_SET(cpu_id, &cs);
485 auto r = pthread_setaffinity_np(pthread_self(), sizeof(cs), &cs);
486 assert(r == 0);
487 (void)r;
488}
489
490/// @}
491
492}