]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/include/seastar/core/posix.hh
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / include / seastar / core / posix.hh
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2016 ScyllaDB
20 */
21
22#pragma once
23
1e59de90 24#include <set>
11fdf7f2 25#include <seastar/core/sstring.hh>
9f95a23c 26#include "abort_on_ebadf.hh"
11fdf7f2
TL
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <assert.h>
31#include <utility>
32#include <fcntl.h>
33#include <sys/ioctl.h>
34#include <sys/eventfd.h>
35#include <sys/timerfd.h>
36#include <sys/socket.h>
37#include <sys/epoll.h>
38#include <sys/mman.h>
39#include <signal.h>
40#include <system_error>
11fdf7f2
TL
41#include <pthread.h>
42#include <signal.h>
1e59de90 43#include <spawn.h>
11fdf7f2
TL
44#include <memory>
45#include <chrono>
46#include <sys/uio.h>
47
48#include <seastar/net/socket_defs.hh>
9f95a23c 49#include <seastar/util/std-compat.hh>
11fdf7f2
TL
50
51namespace seastar {
52
53/// \file
54/// \defgroup posix-support POSIX Support
55///
56/// Mostly-internal APIs to provide C++ glue for the underlying POSIX platform;
57/// but can be used by the application when they don't block.
58///
59/// \addtogroup posix-support
60/// @{
61
62inline void throw_system_error_on(bool condition, const char* what_arg = "");
63
64template <typename T>
65inline void throw_kernel_error(T r);
66
1e59de90
TL
67template <typename T>
68inline void throw_pthread_error(T r);
69
11fdf7f2
TL
70struct mmap_deleter {
71 size_t _size;
72 void operator()(void* ptr) const;
73};
74
75using mmap_area = std::unique_ptr<char[], mmap_deleter>;
76
77mmap_area mmap_anonymous(void* addr, size_t length, int prot, int flags);
78
79class file_desc {
80 int _fd;
81public:
82 file_desc() = delete;
83 file_desc(const file_desc&) = delete;
f67539c2 84 file_desc(file_desc&& x) noexcept : _fd(x._fd) { x._fd = -1; }
11fdf7f2
TL
85 ~file_desc() { if (_fd != -1) { ::close(_fd); } }
86 void operator=(const file_desc&) = delete;
87 file_desc& operator=(file_desc&& x) {
88 if (this != &x) {
89 std::swap(_fd, x._fd);
90 if (x._fd != -1) {
91 x.close();
92 }
93 }
94 return *this;
95 }
96 void close() {
97 assert(_fd != -1);
98 auto r = ::close(_fd);
99 throw_system_error_on(r == -1, "close");
100 _fd = -1;
101 }
102 int get() const { return _fd; }
9f95a23c 103
1e59de90
TL
104 sstring fdinfo() const noexcept;
105
9f95a23c
TL
106 static file_desc from_fd(int fd) {
107 return file_desc(fd);
108 }
109
11fdf7f2
TL
110 static file_desc open(sstring name, int flags, mode_t mode = 0) {
111 int fd = ::open(name.c_str(), flags, mode);
112 throw_system_error_on(fd == -1, "open");
113 return file_desc(fd);
114 }
115 static file_desc socket(int family, int type, int protocol = 0) {
116 int fd = ::socket(family, type, protocol);
117 throw_system_error_on(fd == -1, "socket");
118 return file_desc(fd);
119 }
120 static file_desc eventfd(unsigned initval, int flags) {
121 int fd = ::eventfd(initval, flags);
122 throw_system_error_on(fd == -1, "eventfd");
123 return file_desc(fd);
124 }
125 static file_desc epoll_create(int flags = 0) {
126 int fd = ::epoll_create1(flags);
127 throw_system_error_on(fd == -1, "epoll_create1");
128 return file_desc(fd);
129 }
130 static file_desc timerfd_create(int clockid, int flags) {
131 int fd = ::timerfd_create(clockid, flags);
132 throw_system_error_on(fd == -1, "timerfd_create");
133 return file_desc(fd);
134 }
135 static file_desc temporary(sstring directory);
136 file_desc dup() const {
137 int fd = ::dup(get());
138 throw_system_error_on(fd == -1, "dup");
139 return file_desc(fd);
140 }
9f95a23c
TL
141 file_desc accept(socket_address& sa, int flags = 0) {
142 auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags);
143 throw_system_error_on(ret == -1, "accept4");
144 return file_desc(ret);
145 }
f67539c2 146 static file_desc inotify_init(int flags);
9f95a23c 147 // return nullopt if no connection is availbale to be accepted
f67539c2 148 std::optional<file_desc> try_accept(socket_address& sa, int flags = 0) {
9f95a23c
TL
149 auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags);
150 if (ret == -1 && errno == EAGAIN) {
151 return {};
152 }
11fdf7f2
TL
153 throw_system_error_on(ret == -1, "accept4");
154 return file_desc(ret);
155 }
156 void shutdown(int how) {
157 auto ret = ::shutdown(_fd, how);
158 if (ret == -1 && errno != ENOTCONN) {
159 throw_system_error_on(ret == -1, "shutdown");
160 }
161 }
162 void truncate(size_t size) {
163 auto ret = ::ftruncate(_fd, size);
164 throw_system_error_on(ret, "ftruncate");
165 }
166 int ioctl(int request) {
167 return ioctl(request, 0);
168 }
169 int ioctl(int request, int value) {
170 int r = ::ioctl(_fd, request, value);
171 throw_system_error_on(r == -1, "ioctl");
172 return r;
173 }
174 int ioctl(int request, unsigned int value) {
175 int r = ::ioctl(_fd, request, value);
176 throw_system_error_on(r == -1, "ioctl");
177 return r;
178 }
179 template <class X>
180 int ioctl(int request, X& data) {
181 int r = ::ioctl(_fd, request, &data);
182 throw_system_error_on(r == -1, "ioctl");
183 return r;
184 }
185 template <class X>
186 int ioctl(int request, X&& data) {
187 int r = ::ioctl(_fd, request, &data);
188 throw_system_error_on(r == -1, "ioctl");
189 return r;
190 }
191 template <class X>
192 int setsockopt(int level, int optname, X&& data) {
193 int r = ::setsockopt(_fd, level, optname, &data, sizeof(data));
194 throw_system_error_on(r == -1, "setsockopt");
195 return r;
196 }
197 int setsockopt(int level, int optname, const char* data) {
198 int r = ::setsockopt(_fd, level, optname, data, strlen(data) + 1);
199 throw_system_error_on(r == -1, "setsockopt");
200 return r;
201 }
f67539c2
TL
202 int setsockopt(int level, int optname, const void* data, socklen_t len) {
203 int r = ::setsockopt(_fd, level, optname, data, len);
204 throw_system_error_on(r == -1, "setsockopt");
205 return r;
206 }
11fdf7f2
TL
207 template <typename Data>
208 Data getsockopt(int level, int optname) {
209 Data data;
210 socklen_t len = sizeof(data);
211 memset(&data, 0, len);
212 int r = ::getsockopt(_fd, level, optname, &data, &len);
213 throw_system_error_on(r == -1, "getsockopt");
214 return data;
215 }
216 int getsockopt(int level, int optname, char* data, socklen_t len) {
217 int r = ::getsockopt(_fd, level, optname, data, &len);
218 throw_system_error_on(r == -1, "getsockopt");
219 return r;
220 }
221 size_t size() {
222 struct stat buf;
223 auto r = ::fstat(_fd, &buf);
224 throw_system_error_on(r == -1, "fstat");
225 return buf.st_size;
226 }
f67539c2 227 std::optional<size_t> read(void* buffer, size_t len) {
11fdf7f2
TL
228 auto r = ::read(_fd, buffer, len);
229 if (r == -1 && errno == EAGAIN) {
230 return {};
231 }
232 throw_system_error_on(r == -1, "read");
233 return { size_t(r) };
234 }
f67539c2 235 std::optional<ssize_t> recv(void* buffer, size_t len, int flags) {
11fdf7f2
TL
236 auto r = ::recv(_fd, buffer, len, flags);
237 if (r == -1 && errno == EAGAIN) {
238 return {};
239 }
240 throw_system_error_on(r == -1, "recv");
241 return { ssize_t(r) };
242 }
f67539c2 243 std::optional<size_t> recvmsg(msghdr* mh, int flags) {
11fdf7f2
TL
244 auto r = ::recvmsg(_fd, mh, flags);
245 if (r == -1 && errno == EAGAIN) {
246 return {};
247 }
248 throw_system_error_on(r == -1, "recvmsg");
249 return { size_t(r) };
250 }
f67539c2 251 std::optional<size_t> send(const void* buffer, size_t len, int flags) {
11fdf7f2
TL
252 auto r = ::send(_fd, buffer, len, flags);
253 if (r == -1 && errno == EAGAIN) {
254 return {};
255 }
256 throw_system_error_on(r == -1, "send");
257 return { size_t(r) };
258 }
f67539c2 259 std::optional<size_t> sendto(socket_address& addr, const void* buf, size_t len, int flags) {
9f95a23c 260 auto r = ::sendto(_fd, buf, len, flags, &addr.u.sa, addr.length());
11fdf7f2
TL
261 if (r == -1 && errno == EAGAIN) {
262 return {};
263 }
264 throw_system_error_on(r == -1, "sendto");
265 return { size_t(r) };
266 }
f67539c2 267 std::optional<size_t> sendmsg(const msghdr* msg, int flags) {
11fdf7f2
TL
268 auto r = ::sendmsg(_fd, msg, flags);
269 if (r == -1 && errno == EAGAIN) {
270 return {};
271 }
272 throw_system_error_on(r == -1, "sendmsg");
273 return { size_t(r) };
274 }
275 void bind(sockaddr& sa, socklen_t sl) {
276 auto r = ::bind(_fd, &sa, sl);
277 throw_system_error_on(r == -1, "bind");
278 }
279 void connect(sockaddr& sa, socklen_t sl) {
280 auto r = ::connect(_fd, &sa, sl);
281 if (r == -1 && errno == EINPROGRESS) {
282 return;
283 }
284 throw_system_error_on(r == -1, "connect");
285 }
286 socket_address get_address() {
287 socket_address addr;
9f95a23c 288 auto r = ::getsockname(_fd, &addr.u.sa, &addr.addr_length);
11fdf7f2
TL
289 throw_system_error_on(r == -1, "getsockname");
290 return addr;
291 }
292 void listen(int backlog) {
293 auto fd = ::listen(_fd, backlog);
294 throw_system_error_on(fd == -1, "listen");
295 }
f67539c2 296 std::optional<size_t> write(const void* buf, size_t len) {
11fdf7f2
TL
297 auto r = ::write(_fd, buf, len);
298 if (r == -1 && errno == EAGAIN) {
299 return {};
300 }
301 throw_system_error_on(r == -1, "write");
302 return { size_t(r) };
303 }
f67539c2 304 std::optional<size_t> writev(const iovec *iov, int iovcnt) {
11fdf7f2
TL
305 auto r = ::writev(_fd, iov, iovcnt);
306 if (r == -1 && errno == EAGAIN) {
307 return {};
308 }
309 throw_system_error_on(r == -1, "writev");
310 return { size_t(r) };
311 }
312 size_t pread(void* buf, size_t len, off_t off) {
313 auto r = ::pread(_fd, buf, len, off);
314 throw_system_error_on(r == -1, "pread");
315 return size_t(r);
316 }
317 void timerfd_settime(int flags, const itimerspec& its) {
318 auto r = ::timerfd_settime(_fd, flags, &its, NULL);
319 throw_system_error_on(r == -1, "timerfd_settime");
320 }
321
322 mmap_area map(size_t size, unsigned prot, unsigned flags, size_t offset,
323 void* addr = nullptr) {
324 void *x = mmap(addr, size, prot, flags, _fd, offset);
325 throw_system_error_on(x == MAP_FAILED, "mmap");
326 return mmap_area(static_cast<char*>(x), mmap_deleter{size});
327 }
328
329 mmap_area map_shared_rw(size_t size, size_t offset) {
330 return map(size, PROT_READ | PROT_WRITE, MAP_SHARED, offset);
331 }
332
333 mmap_area map_shared_ro(size_t size, size_t offset) {
334 return map(size, PROT_READ, MAP_SHARED, offset);
335 }
336
337 mmap_area map_private_rw(size_t size, size_t offset) {
338 return map(size, PROT_READ | PROT_WRITE, MAP_PRIVATE, offset);
339 }
340
341 mmap_area map_private_ro(size_t size, size_t offset) {
342 return map(size, PROT_READ, MAP_PRIVATE, offset);
343 }
344
1e59de90
TL
345 void spawn_actions_add_close(posix_spawn_file_actions_t* actions) {
346 auto r = ::posix_spawn_file_actions_addclose(actions, _fd);
347 throw_pthread_error(r);
348 }
349
350 void spawn_actions_add_dup2(posix_spawn_file_actions_t* actions, int newfd) {
351 auto r = ::posix_spawn_file_actions_adddup2(actions, _fd, newfd);
352 throw_pthread_error(r);
353 }
11fdf7f2
TL
354private:
355 file_desc(int fd) : _fd(fd) {}
356 };
357
358
359namespace posix {
360
1e59de90
TL
361static constexpr unsigned rcv_shutdown = 0x1;
362static constexpr unsigned snd_shutdown = 0x2;
363static inline constexpr unsigned shutdown_mask(int how) { return how + 1; }
364
11fdf7f2
TL
365/// Converts a duration value to a `timespec`
366///
367/// \param d a duration value to convert to the POSIX `timespec` format
368/// \return `d` as a `timespec` value
369template <typename Rep, typename Period>
370struct timespec
371to_timespec(std::chrono::duration<Rep, Period> d) {
372 auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(d).count();
373 struct timespec ts {};
374 ts.tv_sec = ns / 1000000000;
375 ts.tv_nsec = ns % 1000000000;
376 return ts;
377}
378
379/// Converts a relative start time and an interval to an `itimerspec`
380///
381/// \param base First expiration of the timer, relative to the current time
382/// \param interval period for re-arming the timer
383/// \return `base` and `interval` converted to an `itimerspec`
384template <typename Rep1, typename Period1, typename Rep2, typename Period2>
385struct itimerspec
386to_relative_itimerspec(std::chrono::duration<Rep1, Period1> base, std::chrono::duration<Rep2, Period2> interval) {
387 struct itimerspec its {};
388 its.it_interval = to_timespec(interval);
389 its.it_value = to_timespec(base);
390 return its;
391}
392
393
394/// Converts a time_point and a duration to an `itimerspec`
395///
396/// \param base base time for the timer; must use the same clock as the timer
397/// \param interval period for re-arming the timer
398/// \return `base` and `interval` converted to an `itimerspec`
399template <typename Clock, class Duration, class Rep, class Period>
400struct itimerspec
401to_absolute_itimerspec(std::chrono::time_point<Clock, Duration> base, std::chrono::duration<Rep, Period> interval) {
402 return to_relative_itimerspec(base.time_since_epoch(), interval);
403}
404
405}
406
407class posix_thread {
408public:
409 class attr;
410private:
411 // must allocate, since this class is moveable
412 std::unique_ptr<std::function<void ()>> _func;
413 pthread_t _pthread;
414 bool _valid = true;
415 mmap_area _stack;
416private:
417 static void* start_routine(void* arg) noexcept;
418public:
419 posix_thread(std::function<void ()> func);
420 posix_thread(attr a, std::function<void ()> func);
421 posix_thread(posix_thread&& x);
422 ~posix_thread();
423 void join();
424public:
425 class attr {
426 public:
427 struct stack_size { size_t size = 0; };
428 attr() = default;
429 template <typename... A>
430 attr(A... a) {
431 set(std::forward<A>(a)...);
432 }
433 void set() {}
434 template <typename A, typename... Rest>
435 void set(A a, Rest... rest) {
436 set(std::forward<A>(a));
437 set(std::forward<Rest>(rest)...);
438 }
439 void set(stack_size ss) { _stack_size = ss; }
440 private:
441 stack_size _stack_size;
442 friend class posix_thread;
443 };
444};
445
446
447inline
448void throw_system_error_on(bool condition, const char* what_arg) {
449 if (condition) {
9f95a23c
TL
450 if ((errno == EBADF || errno == ENOTSOCK) && is_abort_on_ebadf_enabled()) {
451 abort();
452 }
11fdf7f2
TL
453 throw std::system_error(errno, std::system_category(), what_arg);
454 }
455}
456
457template <typename T>
458inline
459void throw_kernel_error(T r) {
460 static_assert(std::is_signed<T>::value, "kernel error variables must be signed");
461 if (r < 0) {
9f95a23c
TL
462 auto ec = -r;
463 if ((ec == EBADF || ec == ENOTSOCK) && is_abort_on_ebadf_enabled()) {
464 abort();
465 }
11fdf7f2
TL
466 throw std::system_error(-r, std::system_category());
467 }
468}
469
470template <typename T>
471inline
472void throw_pthread_error(T r) {
473 if (r != 0) {
474 throw std::system_error(r, std::system_category());
475 }
476}
477
478inline
479sigset_t make_sigset_mask(int signo) {
480 sigset_t set;
481 sigemptyset(&set);
482 sigaddset(&set, signo);
483 return set;
484}
485
486inline
487sigset_t make_full_sigset_mask() {
488 sigset_t set;
489 sigfillset(&set);
490 return set;
491}
492
493inline
494sigset_t make_empty_sigset_mask() {
495 sigset_t set;
496 sigemptyset(&set);
497 return set;
498}
499
500inline
501void pin_this_thread(unsigned cpu_id) {
502 cpu_set_t cs;
503 CPU_ZERO(&cs);
504 CPU_SET(cpu_id, &cs);
505 auto r = pthread_setaffinity_np(pthread_self(), sizeof(cs), &cs);
506 assert(r == 0);
507 (void)r;
508}
509
1e59de90
TL
510std::set<unsigned> get_current_cpuset();
511
11fdf7f2
TL
512/// @}
513
514}