]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright (C) 2016 ScyllaDB | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
1e59de90 | 24 | #include <set> |
11fdf7f2 | 25 | #include <seastar/core/sstring.hh> |
9f95a23c | 26 | #include "abort_on_ebadf.hh" |
11fdf7f2 TL |
27 | #include <sys/types.h> |
28 | #include <sys/stat.h> | |
29 | #include <unistd.h> | |
30 | #include <assert.h> | |
31 | #include <utility> | |
32 | #include <fcntl.h> | |
33 | #include <sys/ioctl.h> | |
34 | #include <sys/eventfd.h> | |
35 | #include <sys/timerfd.h> | |
36 | #include <sys/socket.h> | |
37 | #include <sys/epoll.h> | |
38 | #include <sys/mman.h> | |
39 | #include <signal.h> | |
40 | #include <system_error> | |
11fdf7f2 TL |
41 | #include <pthread.h> |
42 | #include <signal.h> | |
1e59de90 | 43 | #include <spawn.h> |
11fdf7f2 TL |
44 | #include <memory> |
45 | #include <chrono> | |
46 | #include <sys/uio.h> | |
47 | ||
48 | #include <seastar/net/socket_defs.hh> | |
9f95a23c | 49 | #include <seastar/util/std-compat.hh> |
11fdf7f2 TL |
50 | |
51 | namespace seastar { | |
52 | ||
53 | /// \file | |
54 | /// \defgroup posix-support POSIX Support | |
55 | /// | |
56 | /// Mostly-internal APIs to provide C++ glue for the underlying POSIX platform; | |
57 | /// but can be used by the application when they don't block. | |
58 | /// | |
59 | /// \addtogroup posix-support | |
60 | /// @{ | |
61 | ||
62 | inline void throw_system_error_on(bool condition, const char* what_arg = ""); | |
63 | ||
64 | template <typename T> | |
65 | inline void throw_kernel_error(T r); | |
66 | ||
1e59de90 TL |
67 | template <typename T> |
68 | inline void throw_pthread_error(T r); | |
69 | ||
11fdf7f2 TL |
70 | struct mmap_deleter { |
71 | size_t _size; | |
72 | void operator()(void* ptr) const; | |
73 | }; | |
74 | ||
75 | using mmap_area = std::unique_ptr<char[], mmap_deleter>; | |
76 | ||
77 | mmap_area mmap_anonymous(void* addr, size_t length, int prot, int flags); | |
78 | ||
79 | class file_desc { | |
80 | int _fd; | |
81 | public: | |
82 | file_desc() = delete; | |
83 | file_desc(const file_desc&) = delete; | |
f67539c2 | 84 | file_desc(file_desc&& x) noexcept : _fd(x._fd) { x._fd = -1; } |
11fdf7f2 TL |
85 | ~file_desc() { if (_fd != -1) { ::close(_fd); } } |
86 | void operator=(const file_desc&) = delete; | |
87 | file_desc& operator=(file_desc&& x) { | |
88 | if (this != &x) { | |
89 | std::swap(_fd, x._fd); | |
90 | if (x._fd != -1) { | |
91 | x.close(); | |
92 | } | |
93 | } | |
94 | return *this; | |
95 | } | |
96 | void close() { | |
97 | assert(_fd != -1); | |
98 | auto r = ::close(_fd); | |
99 | throw_system_error_on(r == -1, "close"); | |
100 | _fd = -1; | |
101 | } | |
102 | int get() const { return _fd; } | |
9f95a23c | 103 | |
1e59de90 TL |
104 | sstring fdinfo() const noexcept; |
105 | ||
9f95a23c TL |
106 | static file_desc from_fd(int fd) { |
107 | return file_desc(fd); | |
108 | } | |
109 | ||
11fdf7f2 TL |
110 | static file_desc open(sstring name, int flags, mode_t mode = 0) { |
111 | int fd = ::open(name.c_str(), flags, mode); | |
112 | throw_system_error_on(fd == -1, "open"); | |
113 | return file_desc(fd); | |
114 | } | |
115 | static file_desc socket(int family, int type, int protocol = 0) { | |
116 | int fd = ::socket(family, type, protocol); | |
117 | throw_system_error_on(fd == -1, "socket"); | |
118 | return file_desc(fd); | |
119 | } | |
120 | static file_desc eventfd(unsigned initval, int flags) { | |
121 | int fd = ::eventfd(initval, flags); | |
122 | throw_system_error_on(fd == -1, "eventfd"); | |
123 | return file_desc(fd); | |
124 | } | |
125 | static file_desc epoll_create(int flags = 0) { | |
126 | int fd = ::epoll_create1(flags); | |
127 | throw_system_error_on(fd == -1, "epoll_create1"); | |
128 | return file_desc(fd); | |
129 | } | |
130 | static file_desc timerfd_create(int clockid, int flags) { | |
131 | int fd = ::timerfd_create(clockid, flags); | |
132 | throw_system_error_on(fd == -1, "timerfd_create"); | |
133 | return file_desc(fd); | |
134 | } | |
135 | static file_desc temporary(sstring directory); | |
136 | file_desc dup() const { | |
137 | int fd = ::dup(get()); | |
138 | throw_system_error_on(fd == -1, "dup"); | |
139 | return file_desc(fd); | |
140 | } | |
9f95a23c TL |
141 | file_desc accept(socket_address& sa, int flags = 0) { |
142 | auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags); | |
143 | throw_system_error_on(ret == -1, "accept4"); | |
144 | return file_desc(ret); | |
145 | } | |
f67539c2 | 146 | static file_desc inotify_init(int flags); |
9f95a23c | 147 | // return nullopt if no connection is availbale to be accepted |
f67539c2 | 148 | std::optional<file_desc> try_accept(socket_address& sa, int flags = 0) { |
9f95a23c TL |
149 | auto ret = ::accept4(_fd, &sa.as_posix_sockaddr(), &sa.addr_length, flags); |
150 | if (ret == -1 && errno == EAGAIN) { | |
151 | return {}; | |
152 | } | |
11fdf7f2 TL |
153 | throw_system_error_on(ret == -1, "accept4"); |
154 | return file_desc(ret); | |
155 | } | |
156 | void shutdown(int how) { | |
157 | auto ret = ::shutdown(_fd, how); | |
158 | if (ret == -1 && errno != ENOTCONN) { | |
159 | throw_system_error_on(ret == -1, "shutdown"); | |
160 | } | |
161 | } | |
162 | void truncate(size_t size) { | |
163 | auto ret = ::ftruncate(_fd, size); | |
164 | throw_system_error_on(ret, "ftruncate"); | |
165 | } | |
166 | int ioctl(int request) { | |
167 | return ioctl(request, 0); | |
168 | } | |
169 | int ioctl(int request, int value) { | |
170 | int r = ::ioctl(_fd, request, value); | |
171 | throw_system_error_on(r == -1, "ioctl"); | |
172 | return r; | |
173 | } | |
174 | int ioctl(int request, unsigned int value) { | |
175 | int r = ::ioctl(_fd, request, value); | |
176 | throw_system_error_on(r == -1, "ioctl"); | |
177 | return r; | |
178 | } | |
179 | template <class X> | |
180 | int ioctl(int request, X& data) { | |
181 | int r = ::ioctl(_fd, request, &data); | |
182 | throw_system_error_on(r == -1, "ioctl"); | |
183 | return r; | |
184 | } | |
185 | template <class X> | |
186 | int ioctl(int request, X&& data) { | |
187 | int r = ::ioctl(_fd, request, &data); | |
188 | throw_system_error_on(r == -1, "ioctl"); | |
189 | return r; | |
190 | } | |
191 | template <class X> | |
192 | int setsockopt(int level, int optname, X&& data) { | |
193 | int r = ::setsockopt(_fd, level, optname, &data, sizeof(data)); | |
194 | throw_system_error_on(r == -1, "setsockopt"); | |
195 | return r; | |
196 | } | |
197 | int setsockopt(int level, int optname, const char* data) { | |
198 | int r = ::setsockopt(_fd, level, optname, data, strlen(data) + 1); | |
199 | throw_system_error_on(r == -1, "setsockopt"); | |
200 | return r; | |
201 | } | |
f67539c2 TL |
202 | int setsockopt(int level, int optname, const void* data, socklen_t len) { |
203 | int r = ::setsockopt(_fd, level, optname, data, len); | |
204 | throw_system_error_on(r == -1, "setsockopt"); | |
205 | return r; | |
206 | } | |
11fdf7f2 TL |
207 | template <typename Data> |
208 | Data getsockopt(int level, int optname) { | |
209 | Data data; | |
210 | socklen_t len = sizeof(data); | |
211 | memset(&data, 0, len); | |
212 | int r = ::getsockopt(_fd, level, optname, &data, &len); | |
213 | throw_system_error_on(r == -1, "getsockopt"); | |
214 | return data; | |
215 | } | |
216 | int getsockopt(int level, int optname, char* data, socklen_t len) { | |
217 | int r = ::getsockopt(_fd, level, optname, data, &len); | |
218 | throw_system_error_on(r == -1, "getsockopt"); | |
219 | return r; | |
220 | } | |
221 | size_t size() { | |
222 | struct stat buf; | |
223 | auto r = ::fstat(_fd, &buf); | |
224 | throw_system_error_on(r == -1, "fstat"); | |
225 | return buf.st_size; | |
226 | } | |
f67539c2 | 227 | std::optional<size_t> read(void* buffer, size_t len) { |
11fdf7f2 TL |
228 | auto r = ::read(_fd, buffer, len); |
229 | if (r == -1 && errno == EAGAIN) { | |
230 | return {}; | |
231 | } | |
232 | throw_system_error_on(r == -1, "read"); | |
233 | return { size_t(r) }; | |
234 | } | |
f67539c2 | 235 | std::optional<ssize_t> recv(void* buffer, size_t len, int flags) { |
11fdf7f2 TL |
236 | auto r = ::recv(_fd, buffer, len, flags); |
237 | if (r == -1 && errno == EAGAIN) { | |
238 | return {}; | |
239 | } | |
240 | throw_system_error_on(r == -1, "recv"); | |
241 | return { ssize_t(r) }; | |
242 | } | |
f67539c2 | 243 | std::optional<size_t> recvmsg(msghdr* mh, int flags) { |
11fdf7f2 TL |
244 | auto r = ::recvmsg(_fd, mh, flags); |
245 | if (r == -1 && errno == EAGAIN) { | |
246 | return {}; | |
247 | } | |
248 | throw_system_error_on(r == -1, "recvmsg"); | |
249 | return { size_t(r) }; | |
250 | } | |
f67539c2 | 251 | std::optional<size_t> send(const void* buffer, size_t len, int flags) { |
11fdf7f2 TL |
252 | auto r = ::send(_fd, buffer, len, flags); |
253 | if (r == -1 && errno == EAGAIN) { | |
254 | return {}; | |
255 | } | |
256 | throw_system_error_on(r == -1, "send"); | |
257 | return { size_t(r) }; | |
258 | } | |
f67539c2 | 259 | std::optional<size_t> sendto(socket_address& addr, const void* buf, size_t len, int flags) { |
9f95a23c | 260 | auto r = ::sendto(_fd, buf, len, flags, &addr.u.sa, addr.length()); |
11fdf7f2 TL |
261 | if (r == -1 && errno == EAGAIN) { |
262 | return {}; | |
263 | } | |
264 | throw_system_error_on(r == -1, "sendto"); | |
265 | return { size_t(r) }; | |
266 | } | |
f67539c2 | 267 | std::optional<size_t> sendmsg(const msghdr* msg, int flags) { |
11fdf7f2 TL |
268 | auto r = ::sendmsg(_fd, msg, flags); |
269 | if (r == -1 && errno == EAGAIN) { | |
270 | return {}; | |
271 | } | |
272 | throw_system_error_on(r == -1, "sendmsg"); | |
273 | return { size_t(r) }; | |
274 | } | |
275 | void bind(sockaddr& sa, socklen_t sl) { | |
276 | auto r = ::bind(_fd, &sa, sl); | |
277 | throw_system_error_on(r == -1, "bind"); | |
278 | } | |
279 | void connect(sockaddr& sa, socklen_t sl) { | |
280 | auto r = ::connect(_fd, &sa, sl); | |
281 | if (r == -1 && errno == EINPROGRESS) { | |
282 | return; | |
283 | } | |
284 | throw_system_error_on(r == -1, "connect"); | |
285 | } | |
286 | socket_address get_address() { | |
287 | socket_address addr; | |
9f95a23c | 288 | auto r = ::getsockname(_fd, &addr.u.sa, &addr.addr_length); |
11fdf7f2 TL |
289 | throw_system_error_on(r == -1, "getsockname"); |
290 | return addr; | |
291 | } | |
292 | void listen(int backlog) { | |
293 | auto fd = ::listen(_fd, backlog); | |
294 | throw_system_error_on(fd == -1, "listen"); | |
295 | } | |
f67539c2 | 296 | std::optional<size_t> write(const void* buf, size_t len) { |
11fdf7f2 TL |
297 | auto r = ::write(_fd, buf, len); |
298 | if (r == -1 && errno == EAGAIN) { | |
299 | return {}; | |
300 | } | |
301 | throw_system_error_on(r == -1, "write"); | |
302 | return { size_t(r) }; | |
303 | } | |
f67539c2 | 304 | std::optional<size_t> writev(const iovec *iov, int iovcnt) { |
11fdf7f2 TL |
305 | auto r = ::writev(_fd, iov, iovcnt); |
306 | if (r == -1 && errno == EAGAIN) { | |
307 | return {}; | |
308 | } | |
309 | throw_system_error_on(r == -1, "writev"); | |
310 | return { size_t(r) }; | |
311 | } | |
312 | size_t pread(void* buf, size_t len, off_t off) { | |
313 | auto r = ::pread(_fd, buf, len, off); | |
314 | throw_system_error_on(r == -1, "pread"); | |
315 | return size_t(r); | |
316 | } | |
317 | void timerfd_settime(int flags, const itimerspec& its) { | |
318 | auto r = ::timerfd_settime(_fd, flags, &its, NULL); | |
319 | throw_system_error_on(r == -1, "timerfd_settime"); | |
320 | } | |
321 | ||
322 | mmap_area map(size_t size, unsigned prot, unsigned flags, size_t offset, | |
323 | void* addr = nullptr) { | |
324 | void *x = mmap(addr, size, prot, flags, _fd, offset); | |
325 | throw_system_error_on(x == MAP_FAILED, "mmap"); | |
326 | return mmap_area(static_cast<char*>(x), mmap_deleter{size}); | |
327 | } | |
328 | ||
329 | mmap_area map_shared_rw(size_t size, size_t offset) { | |
330 | return map(size, PROT_READ | PROT_WRITE, MAP_SHARED, offset); | |
331 | } | |
332 | ||
333 | mmap_area map_shared_ro(size_t size, size_t offset) { | |
334 | return map(size, PROT_READ, MAP_SHARED, offset); | |
335 | } | |
336 | ||
337 | mmap_area map_private_rw(size_t size, size_t offset) { | |
338 | return map(size, PROT_READ | PROT_WRITE, MAP_PRIVATE, offset); | |
339 | } | |
340 | ||
341 | mmap_area map_private_ro(size_t size, size_t offset) { | |
342 | return map(size, PROT_READ, MAP_PRIVATE, offset); | |
343 | } | |
344 | ||
1e59de90 TL |
345 | void spawn_actions_add_close(posix_spawn_file_actions_t* actions) { |
346 | auto r = ::posix_spawn_file_actions_addclose(actions, _fd); | |
347 | throw_pthread_error(r); | |
348 | } | |
349 | ||
350 | void spawn_actions_add_dup2(posix_spawn_file_actions_t* actions, int newfd) { | |
351 | auto r = ::posix_spawn_file_actions_adddup2(actions, _fd, newfd); | |
352 | throw_pthread_error(r); | |
353 | } | |
11fdf7f2 TL |
354 | private: |
355 | file_desc(int fd) : _fd(fd) {} | |
356 | }; | |
357 | ||
358 | ||
359 | namespace posix { | |
360 | ||
1e59de90 TL |
361 | static constexpr unsigned rcv_shutdown = 0x1; |
362 | static constexpr unsigned snd_shutdown = 0x2; | |
363 | static inline constexpr unsigned shutdown_mask(int how) { return how + 1; } | |
364 | ||
11fdf7f2 TL |
365 | /// Converts a duration value to a `timespec` |
366 | /// | |
367 | /// \param d a duration value to convert to the POSIX `timespec` format | |
368 | /// \return `d` as a `timespec` value | |
369 | template <typename Rep, typename Period> | |
370 | struct timespec | |
371 | to_timespec(std::chrono::duration<Rep, Period> d) { | |
372 | auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(d).count(); | |
373 | struct timespec ts {}; | |
374 | ts.tv_sec = ns / 1000000000; | |
375 | ts.tv_nsec = ns % 1000000000; | |
376 | return ts; | |
377 | } | |
378 | ||
379 | /// Converts a relative start time and an interval to an `itimerspec` | |
380 | /// | |
381 | /// \param base First expiration of the timer, relative to the current time | |
382 | /// \param interval period for re-arming the timer | |
383 | /// \return `base` and `interval` converted to an `itimerspec` | |
384 | template <typename Rep1, typename Period1, typename Rep2, typename Period2> | |
385 | struct itimerspec | |
386 | to_relative_itimerspec(std::chrono::duration<Rep1, Period1> base, std::chrono::duration<Rep2, Period2> interval) { | |
387 | struct itimerspec its {}; | |
388 | its.it_interval = to_timespec(interval); | |
389 | its.it_value = to_timespec(base); | |
390 | return its; | |
391 | } | |
392 | ||
393 | ||
394 | /// Converts a time_point and a duration to an `itimerspec` | |
395 | /// | |
396 | /// \param base base time for the timer; must use the same clock as the timer | |
397 | /// \param interval period for re-arming the timer | |
398 | /// \return `base` and `interval` converted to an `itimerspec` | |
399 | template <typename Clock, class Duration, class Rep, class Period> | |
400 | struct itimerspec | |
401 | to_absolute_itimerspec(std::chrono::time_point<Clock, Duration> base, std::chrono::duration<Rep, Period> interval) { | |
402 | return to_relative_itimerspec(base.time_since_epoch(), interval); | |
403 | } | |
404 | ||
405 | } | |
406 | ||
407 | class posix_thread { | |
408 | public: | |
409 | class attr; | |
410 | private: | |
411 | // must allocate, since this class is moveable | |
412 | std::unique_ptr<std::function<void ()>> _func; | |
413 | pthread_t _pthread; | |
414 | bool _valid = true; | |
415 | mmap_area _stack; | |
416 | private: | |
417 | static void* start_routine(void* arg) noexcept; | |
418 | public: | |
419 | posix_thread(std::function<void ()> func); | |
420 | posix_thread(attr a, std::function<void ()> func); | |
421 | posix_thread(posix_thread&& x); | |
422 | ~posix_thread(); | |
423 | void join(); | |
424 | public: | |
425 | class attr { | |
426 | public: | |
427 | struct stack_size { size_t size = 0; }; | |
428 | attr() = default; | |
429 | template <typename... A> | |
430 | attr(A... a) { | |
431 | set(std::forward<A>(a)...); | |
432 | } | |
433 | void set() {} | |
434 | template <typename A, typename... Rest> | |
435 | void set(A a, Rest... rest) { | |
436 | set(std::forward<A>(a)); | |
437 | set(std::forward<Rest>(rest)...); | |
438 | } | |
439 | void set(stack_size ss) { _stack_size = ss; } | |
440 | private: | |
441 | stack_size _stack_size; | |
442 | friend class posix_thread; | |
443 | }; | |
444 | }; | |
445 | ||
446 | ||
447 | inline | |
448 | void throw_system_error_on(bool condition, const char* what_arg) { | |
449 | if (condition) { | |
9f95a23c TL |
450 | if ((errno == EBADF || errno == ENOTSOCK) && is_abort_on_ebadf_enabled()) { |
451 | abort(); | |
452 | } | |
11fdf7f2 TL |
453 | throw std::system_error(errno, std::system_category(), what_arg); |
454 | } | |
455 | } | |
456 | ||
457 | template <typename T> | |
458 | inline | |
459 | void throw_kernel_error(T r) { | |
460 | static_assert(std::is_signed<T>::value, "kernel error variables must be signed"); | |
461 | if (r < 0) { | |
9f95a23c TL |
462 | auto ec = -r; |
463 | if ((ec == EBADF || ec == ENOTSOCK) && is_abort_on_ebadf_enabled()) { | |
464 | abort(); | |
465 | } | |
11fdf7f2 TL |
466 | throw std::system_error(-r, std::system_category()); |
467 | } | |
468 | } | |
469 | ||
470 | template <typename T> | |
471 | inline | |
472 | void throw_pthread_error(T r) { | |
473 | if (r != 0) { | |
474 | throw std::system_error(r, std::system_category()); | |
475 | } | |
476 | } | |
477 | ||
478 | inline | |
479 | sigset_t make_sigset_mask(int signo) { | |
480 | sigset_t set; | |
481 | sigemptyset(&set); | |
482 | sigaddset(&set, signo); | |
483 | return set; | |
484 | } | |
485 | ||
486 | inline | |
487 | sigset_t make_full_sigset_mask() { | |
488 | sigset_t set; | |
489 | sigfillset(&set); | |
490 | return set; | |
491 | } | |
492 | ||
493 | inline | |
494 | sigset_t make_empty_sigset_mask() { | |
495 | sigset_t set; | |
496 | sigemptyset(&set); | |
497 | return set; | |
498 | } | |
499 | ||
500 | inline | |
501 | void pin_this_thread(unsigned cpu_id) { | |
502 | cpu_set_t cs; | |
503 | CPU_ZERO(&cs); | |
504 | CPU_SET(cpu_id, &cs); | |
505 | auto r = pthread_setaffinity_np(pthread_self(), sizeof(cs), &cs); | |
506 | assert(r == 0); | |
507 | (void)r; | |
508 | } | |
509 | ||
1e59de90 TL |
510 | std::set<unsigned> get_current_cpuset(); |
511 | ||
11fdf7f2 TL |
512 | /// @} |
513 | ||
514 | } |