]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/src/net/posix-stack.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / src / net / posix-stack.cc
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22#include <random>
9f95a23c 23
f67539c2 24#include <sys/socket.h>
9f95a23c
TL
25#include <linux/if.h>
26#include <linux/netlink.h>
27#include <linux/rtnetlink.h>
28#include <net/route.h>
29
f67539c2
TL
30#include <seastar/core/loop.hh>
31#include <seastar/core/reactor.hh>
11fdf7f2
TL
32#include <seastar/net/posix-stack.hh>
33#include <seastar/net/net.hh>
34#include <seastar/net/packet.hh>
35#include <seastar/net/api.hh>
9f95a23c
TL
36#include <seastar/net/inet_address.hh>
37#include <seastar/util/std-compat.hh>
11fdf7f2
TL
38#include <netinet/tcp.h>
39#include <netinet/sctp.h>
40
9f95a23c
TL
41namespace std {
42
43template <>
44struct hash<seastar::net::posix_ap_server_socket_impl::protocol_and_socket_address> {
45 size_t operator()(const seastar::net::posix_ap_server_socket_impl::protocol_and_socket_address& t_sa) const {
46 auto h1 = std::hash<int>()(std::get<0>(t_sa));
47 auto h2 = std::hash<seastar::net::socket_address>()(std::get<1>(t_sa));
48 return h1 ^ h2;
49 }
50};
51
52}
53
f67539c2
TL
54
55namespace {
56
57// reinterpret_cast<foo*>() on a pointer that the compiler knows points to an
58// object with a different type is disliked by the compiler as it violates
59// strict aliasing rules. This safe version does the same thing but keeps the
60// compiler happy.
61template <typename T>
62T
63copy_reinterpret_cast(const void* ptr) {
64 T tmp;
65 std::memcpy(&tmp, ptr, sizeof(T));
66 return tmp;
67}
68
69}
70
11fdf7f2
TL
71namespace seastar {
72
73namespace net {
74
75using namespace seastar;
76
9f95a23c
TL
77class posix_connected_socket_operations {
78public:
79 virtual ~posix_connected_socket_operations() = default;
80 virtual void set_nodelay(file_desc& fd, bool nodelay) const = 0;
81 virtual bool get_nodelay(file_desc& fd) const = 0;
82 virtual void set_keepalive(file_desc& _fd, bool keepalive) const = 0;
83 virtual bool get_keepalive(file_desc& _fd) const = 0;
84 virtual void set_keepalive_parameters(file_desc& _fd, const keepalive_params& params) const = 0;
85 virtual keepalive_params get_keepalive_parameters(file_desc& _fd) const = 0;
f67539c2
TL
86 virtual void set_sockopt(file_desc& _fd, int level, int optname, const void* data, size_t len) const {
87 _fd.setsockopt(level, optname, data, socklen_t(len));
88 }
89 virtual int get_sockopt(file_desc& _fd, int level, int optname, void* data, size_t len) const {
90 return _fd.getsockopt(level, optname, reinterpret_cast<char*>(data), socklen_t(len));
91 }
20effc67
TL
92 virtual socket_address local_address(file_desc& _fd) const {
93 return _fd.get_address();
94 }
9f95a23c
TL
95};
96
97thread_local posix_ap_server_socket_impl::sockets_map_t posix_ap_server_socket_impl::sockets{};
98thread_local posix_ap_server_socket_impl::conn_map_t posix_ap_server_socket_impl::conn_q{};
11fdf7f2 99
9f95a23c 100class posix_tcp_connected_socket_operations : public posix_connected_socket_operations {
11fdf7f2 101public:
9f95a23c 102 virtual void set_nodelay(file_desc& _fd, bool nodelay) const override {
11fdf7f2
TL
103 _fd.setsockopt(IPPROTO_TCP, TCP_NODELAY, int(nodelay));
104 }
9f95a23c 105 virtual bool get_nodelay(file_desc& _fd) const override {
11fdf7f2
TL
106 return _fd.getsockopt<int>(IPPROTO_TCP, TCP_NODELAY);
107 }
9f95a23c 108 virtual void set_keepalive(file_desc& _fd, bool keepalive) const override {
11fdf7f2
TL
109 _fd.setsockopt(SOL_SOCKET, SO_KEEPALIVE, int(keepalive));
110 }
9f95a23c 111 virtual bool get_keepalive(file_desc& _fd) const override {
11fdf7f2
TL
112 return _fd.getsockopt<int>(SOL_SOCKET, SO_KEEPALIVE);
113 }
9f95a23c 114 virtual void set_keepalive_parameters(file_desc& _fd, const keepalive_params& params) const override {
f67539c2 115 const tcp_keepalive_params& pms = std::get<tcp_keepalive_params>(params);
11fdf7f2
TL
116 _fd.setsockopt(IPPROTO_TCP, TCP_KEEPCNT, pms.count);
117 _fd.setsockopt(IPPROTO_TCP, TCP_KEEPIDLE, int(pms.idle.count()));
118 _fd.setsockopt(IPPROTO_TCP, TCP_KEEPINTVL, int(pms.interval.count()));
119 }
9f95a23c 120 virtual keepalive_params get_keepalive_parameters(file_desc& _fd) const override {
11fdf7f2
TL
121 return tcp_keepalive_params {
122 std::chrono::seconds(_fd.getsockopt<int>(IPPROTO_TCP, TCP_KEEPIDLE)),
123 std::chrono::seconds(_fd.getsockopt<int>(IPPROTO_TCP, TCP_KEEPINTVL)),
124 _fd.getsockopt<unsigned>(IPPROTO_TCP, TCP_KEEPCNT)
125 };
126 }
127};
128
9f95a23c 129class posix_sctp_connected_socket_operations : public posix_connected_socket_operations {
11fdf7f2 130public:
9f95a23c 131 virtual void set_nodelay(file_desc& _fd, bool nodelay) const override {
11fdf7f2
TL
132 _fd.setsockopt(SOL_SCTP, SCTP_NODELAY, int(nodelay));
133 }
9f95a23c 134 virtual bool get_nodelay(file_desc& _fd) const override {
11fdf7f2
TL
135 return _fd.getsockopt<int>(SOL_SCTP, SCTP_NODELAY);
136 }
9f95a23c 137 virtual void set_keepalive(file_desc& _fd, bool keepalive) const override {
11fdf7f2
TL
138 auto heartbeat = _fd.getsockopt<sctp_paddrparams>(SOL_SCTP, SCTP_PEER_ADDR_PARAMS);
139 if (keepalive) {
140 heartbeat.spp_flags |= SPP_HB_ENABLE;
141 } else {
142 heartbeat.spp_flags &= ~SPP_HB_ENABLE;
143 }
144 _fd.setsockopt(SOL_SCTP, SCTP_PEER_ADDR_PARAMS, heartbeat);
145 }
9f95a23c 146 virtual bool get_keepalive(file_desc& _fd) const override {
11fdf7f2
TL
147 return _fd.getsockopt<sctp_paddrparams>(SOL_SCTP, SCTP_PEER_ADDR_PARAMS).spp_flags & SPP_HB_ENABLE;
148 }
9f95a23c 149 virtual void set_keepalive_parameters(file_desc& _fd, const keepalive_params& kpms) const override {
f67539c2 150 const sctp_keepalive_params& pms = std::get<sctp_keepalive_params>(kpms);
11fdf7f2
TL
151 auto params = _fd.getsockopt<sctp_paddrparams>(SOL_SCTP, SCTP_PEER_ADDR_PARAMS);
152 params.spp_hbinterval = pms.interval.count() * 1000; // in milliseconds
153 params.spp_pathmaxrxt = pms.count;
154 _fd.setsockopt(SOL_SCTP, SCTP_PEER_ADDR_PARAMS, params);
155 }
9f95a23c 156 virtual keepalive_params get_keepalive_parameters(file_desc& _fd) const override {
11fdf7f2
TL
157 auto params = _fd.getsockopt<sctp_paddrparams>(SOL_SCTP, SCTP_PEER_ADDR_PARAMS);
158 return sctp_keepalive_params {
159 std::chrono::seconds(params.spp_hbinterval/1000), // in seconds
160 params.spp_pathmaxrxt
161 };
162 }
163};
164
9f95a23c
TL
165class posix_unix_stream_connected_socket_operations : public posix_connected_socket_operations {
166public:
167 virtual void set_nodelay(file_desc& fd, bool nodelay) const override {
168 assert(nodelay); // make sure nobody actually tries to use this non-existing functionality
169 }
170 virtual bool get_nodelay(file_desc& fd) const override {
171 return true;
172 }
173 virtual void set_keepalive(file_desc& fd, bool keepalive) const override {}
174 virtual bool get_keepalive(file_desc& fd) const override {
175 return false;
176 }
177 virtual void set_keepalive_parameters(file_desc& fd, const keepalive_params& p) const override {}
178 virtual keepalive_params get_keepalive_parameters(file_desc& fd) const override {
179 return keepalive_params{};
180 }
181};
182
183static const posix_connected_socket_operations*
184get_posix_connected_socket_ops(sa_family_t family, int protocol) {
185 static posix_tcp_connected_socket_operations tcp_ops;
186 static posix_sctp_connected_socket_operations sctp_ops;
187 static posix_unix_stream_connected_socket_operations unix_ops;
188 switch (family) {
189 case AF_INET:
190 case AF_INET6:
191 switch (protocol) {
192 case IPPROTO_TCP: return &tcp_ops;
193 case IPPROTO_SCTP: return &sctp_ops;
194 default: abort();
195 }
196 case AF_UNIX:
197 return &unix_ops;
198 default:
199 abort();
200 }
201}
202
1e59de90
TL
203static void shutdown_socket_fd(pollable_fd& fd, int how) noexcept {
204 try {
205 // file_desc::shutdown ignores ENOTCONN. Other reasons for exception
206 // EINVAL (wrong "how") -- impossible
207 // ENOTSOCK (not a socket) -- incredible
208 // EBADF (invalid file descriptor) -- irretrievable
209 fd.shutdown(how);
210 } catch (...) {
211 on_internal_error(seastar_logger, format("socket shutdown({}, {}) failed: {}", fd.get_file_desc().fdinfo(), how, std::current_exception()));
212 }
213}
214
9f95a23c 215class posix_connected_socket_impl final : public connected_socket_impl {
f67539c2 216 pollable_fd _fd;
9f95a23c 217 const posix_connected_socket_operations* _ops;
11fdf7f2 218 conntrack::handle _handle;
f67539c2 219 std::pmr::polymorphic_allocator<char>* _allocator;
11fdf7f2 220private:
f67539c2 221 explicit posix_connected_socket_impl(sa_family_t family, int protocol, pollable_fd fd, std::pmr::polymorphic_allocator<char>* allocator=memory::malloc_allocator) :
9f95a23c 222 _fd(std::move(fd)), _ops(get_posix_connected_socket_ops(family, protocol)), _allocator(allocator) {}
f67539c2
TL
223 explicit posix_connected_socket_impl(sa_family_t family, int protocol, pollable_fd fd, conntrack::handle&& handle,
224 std::pmr::polymorphic_allocator<char>* allocator=memory::malloc_allocator) : _fd(std::move(fd))
9f95a23c 225 , _ops(get_posix_connected_socket_ops(family, protocol)), _handle(std::move(handle)), _allocator(allocator) {}
11fdf7f2
TL
226public:
227 virtual data_source source() override {
f67539c2
TL
228 return source(connected_socket_input_stream_config());
229 }
230 virtual data_source source(connected_socket_input_stream_config csisc) override {
231 return data_source(std::make_unique<posix_data_source_impl>(_fd, csisc, _allocator));
11fdf7f2
TL
232 }
233 virtual data_sink sink() override {
234 return data_sink(std::make_unique< posix_data_sink_impl>(_fd));
235 }
236 virtual void shutdown_input() override {
1e59de90 237 shutdown_socket_fd(_fd, SHUT_RD);
11fdf7f2
TL
238 }
239 virtual void shutdown_output() override {
1e59de90 240 shutdown_socket_fd(_fd, SHUT_WR);
11fdf7f2
TL
241 }
242 virtual void set_nodelay(bool nodelay) override {
f67539c2 243 return _ops->set_nodelay(_fd.get_file_desc(), nodelay);
11fdf7f2
TL
244 }
245 virtual bool get_nodelay() const override {
f67539c2 246 return _ops->get_nodelay(_fd.get_file_desc());
11fdf7f2
TL
247 }
248 void set_keepalive(bool keepalive) override {
f67539c2 249 return _ops->set_keepalive(_fd.get_file_desc(), keepalive);
11fdf7f2
TL
250 }
251 bool get_keepalive() const override {
f67539c2 252 return _ops->get_keepalive(_fd.get_file_desc());
11fdf7f2
TL
253 }
254 void set_keepalive_parameters(const keepalive_params& p) override {
f67539c2 255 return _ops->set_keepalive_parameters(_fd.get_file_desc(), p);
11fdf7f2
TL
256 }
257 keepalive_params get_keepalive_parameters() const override {
f67539c2
TL
258 return _ops->get_keepalive_parameters(_fd.get_file_desc());
259 }
260 void set_sockopt(int level, int optname, const void* data, size_t len) override {
261 return _ops->set_sockopt(_fd.get_file_desc(), level, optname, data, len);
262 }
263 int get_sockopt(int level, int optname, void* data, size_t len) const override {
264 return _ops->get_sockopt(_fd.get_file_desc(), level, optname, data, len);
11fdf7f2 265 }
20effc67
TL
266 socket_address local_address() const noexcept override {
267 return _ops->local_address(_fd.get_file_desc());
268 }
1e59de90
TL
269 future<> wait_input_shutdown() override {
270 return _fd.poll_rdhup();
271 }
20effc67 272
9f95a23c
TL
273 friend class posix_server_socket_impl;
274 friend class posix_ap_server_socket_impl;
275 friend class posix_reuseport_server_socket_impl;
11fdf7f2
TL
276 friend class posix_network_stack;
277 friend class posix_ap_network_stack;
278 friend class posix_socket_impl;
279};
9f95a23c
TL
280
281static void resolve_outgoing_address(socket_address& a) {
282 if (a.family() != AF_INET6
283 || a.as_posix_sockaddr_in6().sin6_scope_id != inet_address::invalid_scope
284 || !IN6_IS_ADDR_LINKLOCAL(&a.as_posix_sockaddr_in6().sin6_addr)
285 ) {
286 return;
287 }
288
289 FILE *f;
290
291 if (!(f = fopen("/proc/net/ipv6_route", "r"))) {
292 throw std::system_error(errno, std::system_category(), "resolve_address");
293 }
294
295 auto holder = std::unique_ptr<FILE, int(*)(FILE *)>(f, &::fclose);
296
297 /**
298 Here all configured IPv6 routes are shown in a special format. The example displays for loopback interface only. The meaning is shown below (see net/ipv6/route.c for more).
299
300 # cat /proc/net/ipv6_route
301 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000001 00200200 lo
302 +------------------------------+ ++ +------------------------------+ ++ +------------------------------+ +------+ +------+ +------+ +------+ ++
303 | | | | | | | | | |
304 1 2 3 4 5 6 7 8 9 10
305
306 1: IPv6 destination network displayed in 32 hexadecimal chars without colons as separator
307
308 2: IPv6 destination prefix length in hexadecimal
309
310 3: IPv6 source network displayed in 32 hexadecimal chars without colons as separator
311
312 4: IPv6 source prefix length in hexadecimal
313
314 5: IPv6 next hop displayed in 32 hexadecimal chars without colons as separator
315
316 6: Metric in hexadecimal
317
318 7: Reference counter
319
320 8: Use counter
321
322 9: Flags
323
324 10: Device name
325
326 */
327
328 uint32_t prefix_len, src_prefix_len;
329 unsigned long flags;
330 char device[16];
331 char dest_str[40];
332
333 for (;;) {
334 auto n = fscanf(f, "%4s%4s%4s%4s%4s%4s%4s%4s %02x "
335 "%*4s%*4s%*4s%*4s%*4s%*4s%*4s%*4s %02x "
336 "%*4s%*4s%*4s%*4s%*4s%*4s%*4s%*4s "
337 "%*08x %*08x %*08x %08lx %8s",
338 &dest_str[0], &dest_str[5], &dest_str[10], &dest_str[15],
339 &dest_str[20], &dest_str[25], &dest_str[30], &dest_str[35],
340 &prefix_len,
341 &src_prefix_len,
342 &flags, device);
343 if (n != 12) {
344 break;
345 }
346
347 if ((prefix_len > 128) || (src_prefix_len != 0)
348 || (flags & (RTF_POLICY | RTF_FLOW))
349 || ((flags & RTF_REJECT) && prefix_len == 0) /* reject all */) {
350 continue;
351 }
352
353 dest_str[4] = dest_str[9] = dest_str[14] = dest_str[19] = dest_str[24] = dest_str[29] = dest_str[34] = ':';
354 dest_str[39] = '\0';
355
356 struct in6_addr addr;
357 if (inet_pton(AF_INET6, dest_str, &addr) < 0) {
358 /* not an Ipv6 address */
359 continue;
360 }
361
362 auto bytes = prefix_len / 8;
363 auto bits = prefix_len % 8;
364
365 auto& src = a.as_posix_sockaddr_in6().sin6_addr;
366
367 if (bytes > 0 && memcmp(&src, &addr, bytes)) {
368 continue;
369 }
370 if (bits > 0) {
371 auto c1 = src.s6_addr[bytes];
372 auto c2 = addr.s6_addr[bytes];
373 auto mask = 0xffu << (8 - bits);
374 if ((c1 & mask) != (c2 & mask)) {
375 continue;
376 }
377 }
378
379 // found the route.
380 for (auto& nif : engine().net().network_interfaces()) {
381 if (nif.name() == device || nif.display_name() == device) {
382 a.as_posix_sockaddr_in6().sin6_scope_id = nif.index();
383 return;
384 }
385 }
386 }
387}
11fdf7f2
TL
388
389class posix_socket_impl final : public socket_impl {
f67539c2
TL
390 pollable_fd _fd;
391 std::pmr::polymorphic_allocator<char>* _allocator;
9f95a23c 392 bool _reuseaddr = false;
11fdf7f2
TL
393
394 future<> find_port_and_connect(socket_address sa, socket_address local, transport proto = transport::TCP) {
395 static thread_local std::default_random_engine random_engine{std::random_device{}()};
396 static thread_local std::uniform_int_distribution<uint16_t> u(49152/smp::count + 1, 65535/smp::count - 1);
9f95a23c
TL
397 // If no explicit local address, set to dest address family wildcard.
398 if (local.is_unspecified()) {
399 local = net::inet_address(sa.addr().in_family());
400 }
401 resolve_outgoing_address(sa);
11fdf7f2 402 return repeat([this, sa, local, proto, attempts = 0, requested_port = ntoh(local.as_posix_sockaddr_in().sin_port)] () mutable {
9f95a23c 403 _fd = engine().make_pollable_fd(sa, int(proto));
f67539c2
TL
404 _fd.get_file_desc().setsockopt(SOL_SOCKET, SO_REUSEADDR, int(_reuseaddr));
405 uint16_t port = attempts++ < 5 && requested_port == 0 && proto == transport::TCP ? u(random_engine) * smp::count + this_shard_id() : requested_port;
11fdf7f2 406 local.as_posix_sockaddr_in().sin_port = hton(port);
f67539c2 407 return futurize_invoke([this, sa, local] { return engine().posix_connect(_fd, sa, local); }).then_wrapped([port, requested_port] (future<> f) {
11fdf7f2
TL
408 try {
409 f.get();
410 return stop_iteration::yes;
411 } catch (std::system_error& err) {
9f95a23c 412 if (port != requested_port && (err.code().value() == EADDRINUSE || err.code().value() == EADDRNOTAVAIL)) {
11fdf7f2
TL
413 return stop_iteration::no;
414 }
415 throw;
416 }
417 });
418 });
419 }
420
9f95a23c
TL
421 /// an aux function to handle unix-domain-specific requests
422 future<connected_socket> connect_unix_domain(socket_address sa, socket_address local) {
423 // note that if the 'local' address was not set by the client, it was created as an undefined address
424 if (local.is_unspecified()) {
425 local = socket_address{unix_domain_addr{std::string{}}};
426 }
427
428 _fd = engine().make_pollable_fd(sa, 0);
429 return engine().posix_connect(_fd, sa, local).then(
430 [fd = _fd, allocator = _allocator](){
431 // a problem with 'private' interaction with 'unique_ptr'
432 std::unique_ptr<connected_socket_impl> csi;
433 csi.reset(new posix_connected_socket_impl{AF_UNIX, 0, std::move(fd), allocator});
434 return make_ready_future<connected_socket>(connected_socket(std::move(csi)));
435 }
436 );
437 }
438
11fdf7f2 439public:
f67539c2 440 explicit posix_socket_impl(std::pmr::polymorphic_allocator<char>* allocator=memory::malloc_allocator) : _allocator(allocator) {}
11fdf7f2
TL
441
442 virtual future<connected_socket> connect(socket_address sa, socket_address local, transport proto = transport::TCP) override {
9f95a23c
TL
443 if (sa.is_af_unix()) {
444 return connect_unix_domain(sa, local);
445 }
446 return find_port_and_connect(sa, local, proto).then([this, sa, proto, allocator = _allocator] () mutable {
11fdf7f2 447 std::unique_ptr<connected_socket_impl> csi;
9f95a23c 448 csi.reset(new posix_connected_socket_impl(sa.family(), static_cast<int>(proto), _fd, allocator));
11fdf7f2
TL
449 return make_ready_future<connected_socket>(connected_socket(std::move(csi)));
450 });
451 }
452
9f95a23c
TL
453 void set_reuseaddr(bool reuseaddr) override {
454 _reuseaddr = reuseaddr;
455 if (_fd) {
f67539c2 456 _fd.get_file_desc().setsockopt(SOL_SOCKET, SO_REUSEADDR, int(reuseaddr));
9f95a23c
TL
457 }
458 }
459
460 bool get_reuseaddr() const override {
461 if(_fd) {
f67539c2 462 return _fd.get_file_desc().getsockopt<int>(SOL_SOCKET, SO_REUSEADDR);
9f95a23c
TL
463 } else {
464 return _reuseaddr;
465 }
466 }
467
11fdf7f2
TL
468 virtual void shutdown() override {
469 if (_fd) {
470 try {
f67539c2 471 _fd.shutdown(SHUT_RDWR);
11fdf7f2
TL
472 } catch (std::system_error& e) {
473 if (e.code().value() != ENOTCONN) {
474 throw;
475 }
476 }
477 }
478 }
479};
480
9f95a23c
TL
481future<accept_result>
482posix_server_socket_impl::accept() {
483 return _lfd.accept().then([this] (std::tuple<pollable_fd, socket_address> fd_sa) {
484 auto& fd = std::get<0>(fd_sa);
485 auto& sa = std::get<1>(fd_sa);
486 auto cth = [this, &sa] {
487 switch(_lba) {
488 case server_socket::load_balancing_algorithm::connection_distribution:
489 return _conntrack.get_handle();
490 case server_socket::load_balancing_algorithm::port:
491 return _conntrack.get_handle(ntoh(sa.as_posix_sockaddr_in().sin_port) % smp::count);
492 case server_socket::load_balancing_algorithm::fixed:
493 return _conntrack.get_handle(_fixed_cpu);
494 default: abort();
495 }
496 } ();
11fdf7f2 497 auto cpu = cth.cpu();
f67539c2 498 if (cpu == this_shard_id()) {
11fdf7f2 499 std::unique_ptr<connected_socket_impl> csi(
f67539c2 500 new posix_connected_socket_impl(sa.family(), _protocol, std::move(fd), std::move(cth), _allocator));
9f95a23c
TL
501 return make_ready_future<accept_result>(
502 accept_result{connected_socket(std::move(csi)), sa});
11fdf7f2 503 } else {
9f95a23c
TL
504 // FIXME: future is discarded
505 (void)smp::submit_to(cpu, [protocol = _protocol, ssa = _sa, fd = std::move(fd.get_file_desc()), sa, cth = std::move(cth), allocator = _allocator] () mutable {
506 posix_ap_server_socket_impl::move_connected_socket(protocol, ssa, pollable_fd(std::move(fd)), sa, std::move(cth), allocator);
11fdf7f2
TL
507 });
508 return accept();
509 }
510 });
511}
512
11fdf7f2 513void
9f95a23c 514posix_server_socket_impl::abort_accept() {
1e59de90 515 _lfd.shutdown(SHUT_RD, pollable_fd::shutdown_kernel_only::no);
11fdf7f2
TL
516}
517
9f95a23c
TL
518socket_address posix_server_socket_impl::local_address() const {
519 return _lfd.get_file_desc().get_address();
520}
521
522future<accept_result> posix_ap_server_socket_impl::accept() {
523 auto t_sa = std::make_tuple(_protocol, _sa);
524 auto conni = conn_q.find(t_sa);
11fdf7f2
TL
525 if (conni != conn_q.end()) {
526 connection c = std::move(conni->second);
527 conn_q.erase(conni);
528 try {
529 std::unique_ptr<connected_socket_impl> csi(
f67539c2 530 new posix_connected_socket_impl(_sa.family(), _protocol, std::move(c.fd), std::move(c.connection_tracking_handle), _allocator));
9f95a23c 531 return make_ready_future<accept_result>(accept_result{connected_socket(std::move(csi)), std::move(c.addr)});
11fdf7f2 532 } catch (...) {
9f95a23c 533 return make_exception_future<accept_result>(std::current_exception());
11fdf7f2
TL
534 }
535 } else {
536 try {
9f95a23c 537 auto i = sockets.emplace(std::piecewise_construct, std::make_tuple(t_sa), std::make_tuple());
11fdf7f2
TL
538 assert(i.second);
539 return i.first->second.get_future();
540 } catch (...) {
9f95a23c 541 return make_exception_future<accept_result>(std::current_exception());
11fdf7f2
TL
542 }
543 }
544}
545
11fdf7f2 546void
9f95a23c
TL
547posix_ap_server_socket_impl::abort_accept() {
548 auto t_sa = std::make_tuple(_protocol, _sa);
549 conn_q.erase(t_sa);
550 auto i = sockets.find(t_sa);
11fdf7f2
TL
551 if (i != sockets.end()) {
552 i->second.set_exception(std::system_error(ECONNABORTED, std::system_category()));
553 sockets.erase(i);
554 }
555}
556
9f95a23c
TL
557future<accept_result>
558posix_reuseport_server_socket_impl::accept() {
559 return _lfd.accept().then([allocator = _allocator, protocol = _protocol] (std::tuple<pollable_fd, socket_address> fd_sa) {
560 auto& fd = std::get<0>(fd_sa);
561 auto& sa = std::get<1>(fd_sa);
11fdf7f2 562 std::unique_ptr<connected_socket_impl> csi(
f67539c2 563 new posix_connected_socket_impl(sa.family(), protocol, std::move(fd), allocator));
9f95a23c
TL
564 return make_ready_future<accept_result>(
565 accept_result{connected_socket(std::move(csi)), sa});
11fdf7f2
TL
566 });
567}
568
11fdf7f2 569void
9f95a23c 570posix_reuseport_server_socket_impl::abort_accept() {
1e59de90 571 _lfd.shutdown(SHUT_RD, pollable_fd::shutdown_kernel_only::no);
11fdf7f2
TL
572}
573
9f95a23c
TL
574socket_address posix_reuseport_server_socket_impl::local_address() const {
575 return _lfd.get_file_desc().get_address();
576}
577
11fdf7f2 578void
f67539c2 579posix_ap_server_socket_impl::move_connected_socket(int protocol, socket_address sa, pollable_fd fd, socket_address addr, conntrack::handle cth, std::pmr::polymorphic_allocator<char>* allocator) {
9f95a23c
TL
580 auto t_sa = std::make_tuple(protocol, sa);
581 auto i = sockets.find(t_sa);
11fdf7f2
TL
582 if (i != sockets.end()) {
583 try {
f67539c2 584 std::unique_ptr<connected_socket_impl> csi(new posix_connected_socket_impl(sa.family(), protocol, std::move(fd), std::move(cth), allocator));
9f95a23c 585 i->second.set_value(accept_result{connected_socket(std::move(csi)), std::move(addr)});
11fdf7f2
TL
586 } catch (...) {
587 i->second.set_exception(std::current_exception());
588 }
589 sockets.erase(i);
590 } else {
9f95a23c 591 conn_q.emplace(std::piecewise_construct, std::make_tuple(t_sa), std::make_tuple(std::move(fd), std::move(addr), std::move(cth)));
11fdf7f2
TL
592 }
593}
594
595future<temporary_buffer<char>>
596posix_data_source_impl::get() {
1e59de90 597 return _fd.recv_some(static_cast<internal::buffer_allocator*>(this)).then([this] (temporary_buffer<char> b) {
f67539c2
TL
598 if (b.size() >= _config.buffer_size) {
599 _config.buffer_size *= 2;
600 _config.buffer_size = std::min(_config.buffer_size, _config.max_buffer_size);
601 } else if (b.size() <= _config.buffer_size / 4) {
602 _config.buffer_size /= 2;
603 _config.buffer_size = std::max(_config.buffer_size, _config.min_buffer_size);
604 }
605 return b;
11fdf7f2
TL
606 });
607}
608
f67539c2
TL
609temporary_buffer<char>
610posix_data_source_impl::allocate_buffer() {
611 return make_temporary_buffer<char>(_buffer_allocator, _config.buffer_size);
612}
613
11fdf7f2 614future<> posix_data_source_impl::close() {
f67539c2 615 _fd.shutdown(SHUT_RD);
11fdf7f2
TL
616 return make_ready_future<>();
617}
618
619std::vector<struct iovec> to_iovec(const packet& p) {
620 std::vector<struct iovec> v;
621 v.reserve(p.nr_frags());
622 for (auto&& f : p.fragments()) {
623 v.push_back({.iov_base = f.base, .iov_len = f.size});
624 }
625 return v;
626}
627
628std::vector<iovec> to_iovec(std::vector<temporary_buffer<char>>& buf_vec) {
629 std::vector<iovec> v;
630 v.reserve(buf_vec.size());
631 for (auto& buf : buf_vec) {
632 v.push_back({.iov_base = buf.get_write(), .iov_len = buf.size()});
633 }
634 return v;
635}
636
637future<>
638posix_data_sink_impl::put(temporary_buffer<char> buf) {
f67539c2 639 return _fd.write_all(buf.get(), buf.size()).then([d = buf.release()] {});
11fdf7f2
TL
640}
641
642future<>
643posix_data_sink_impl::put(packet p) {
644 _p = std::move(p);
f67539c2 645 return _fd.write_all(_p).then([this] { _p.reset(); });
11fdf7f2
TL
646}
647
648future<>
649posix_data_sink_impl::close() {
f67539c2 650 _fd.shutdown(SHUT_WR);
11fdf7f2
TL
651 return make_ready_future<>();
652}
653
20effc67 654posix_network_stack::posix_network_stack(const program_options::option_group& opts, std::pmr::polymorphic_allocator<char>* allocator)
f67539c2
TL
655 : _reuseport(engine().posix_reuseport_available()), _allocator(allocator) {
656}
657
11fdf7f2
TL
658server_socket
659posix_network_stack::listen(socket_address sa, listen_options opt) {
f67539c2 660 using server_socket = seastar::server_socket;
9f95a23c
TL
661 // allow unspecified bind address -> default to ipv4 wildcard
662 if (sa.is_unspecified()) {
663 sa = inet_address(inet_address::family::INET);
664 }
665 if (sa.is_af_unix()) {
666 return server_socket(std::make_unique<posix_server_socket_impl>(0, sa, engine().posix_listen(sa, opt), opt.lba, opt.fixed_cpu, _allocator));
11fdf7f2 667 }
9f95a23c
TL
668 auto protocol = static_cast<int>(opt.proto);
669 return _reuseport ?
670 server_socket(std::make_unique<posix_reuseport_server_socket_impl>(protocol, sa, engine().posix_listen(sa, opt), _allocator))
671 :
672 server_socket(std::make_unique<posix_server_socket_impl>(protocol, sa, engine().posix_listen(sa, opt), opt.lba, opt.fixed_cpu, _allocator));
11fdf7f2
TL
673}
674
675::seastar::socket posix_network_stack::socket() {
9f95a23c 676 return ::seastar::socket(std::make_unique<posix_socket_impl>(_allocator));
11fdf7f2
TL
677}
678
20effc67
TL
679posix_ap_network_stack::posix_ap_network_stack(const program_options::option_group& opts, std::pmr::polymorphic_allocator<char>* allocator)
680 : posix_network_stack(opts, allocator), _reuseport(engine().posix_reuseport_available()) {
f67539c2
TL
681}
682
11fdf7f2
TL
683server_socket
684posix_ap_network_stack::listen(socket_address sa, listen_options opt) {
f67539c2 685 using server_socket = seastar::server_socket;
9f95a23c
TL
686 // allow unspecified bind address -> default to ipv4 wildcard
687 if (sa.is_unspecified()) {
688 sa = inet_address(inet_address::family::INET);
689 }
690 if (sa.is_af_unix()) {
691 return server_socket(std::make_unique<posix_ap_server_socket_impl>(0, sa, _allocator));
11fdf7f2 692 }
9f95a23c
TL
693 auto protocol = static_cast<int>(opt.proto);
694 return _reuseport ?
695 server_socket(std::make_unique<posix_reuseport_server_socket_impl>(protocol, sa, engine().posix_listen(sa, opt), _allocator))
696 :
697 server_socket(std::make_unique<posix_ap_server_socket_impl>(protocol, sa, _allocator));
11fdf7f2
TL
698}
699
700struct cmsg_with_pktinfo {
701 struct cmsghdrcmh;
9f95a23c
TL
702 union {
703 struct in_pktinfo pktinfo;
704 struct in6_pktinfo pkt6info;
705 };
11fdf7f2
TL
706};
707
708class posix_udp_channel : public udp_channel_impl {
709private:
710 static constexpr int MAX_DATAGRAM_SIZE = 65507;
711 struct recv_ctx {
712 struct msghdr _hdr;
713 struct iovec _iov;
714 socket_address _src_addr;
715 char* _buffer;
716 cmsg_with_pktinfo _cmsg;
717
718 recv_ctx() {
719 memset(&_hdr, 0, sizeof(_hdr));
720 _hdr.msg_iov = &_iov;
721 _hdr.msg_iovlen = 1;
722 _hdr.msg_name = &_src_addr.u.sa;
723 _hdr.msg_namelen = sizeof(_src_addr.u.sas);
724 memset(&_cmsg, 0, sizeof(_cmsg));
725 _hdr.msg_control = &_cmsg;
726 _hdr.msg_controllen = sizeof(_cmsg);
727 }
728
729 void prepare() {
730 _buffer = new char[MAX_DATAGRAM_SIZE];
731 _iov.iov_base = _buffer;
732 _iov.iov_len = MAX_DATAGRAM_SIZE;
733 }
734 };
735 struct send_ctx {
736 struct msghdr _hdr;
737 std::vector<struct iovec> _iovecs;
738 socket_address _dst;
739 packet _p;
740
741 send_ctx() {
742 memset(&_hdr, 0, sizeof(_hdr));
743 _hdr.msg_name = &_dst.u.sa;
f67539c2 744 _hdr.msg_namelen = _dst.addr_length;
11fdf7f2
TL
745 }
746
9f95a23c
TL
747 void prepare(const socket_address& dst, packet p) {
748 _dst = dst;
f67539c2 749 _hdr.msg_namelen = _dst.addr_length;
11fdf7f2
TL
750 _p = std::move(p);
751 _iovecs = to_iovec(_p);
752 _hdr.msg_iov = _iovecs.data();
753 _hdr.msg_iovlen = _iovecs.size();
9f95a23c 754 resolve_outgoing_address(_dst);
11fdf7f2
TL
755 }
756 };
f67539c2 757 pollable_fd _fd;
9f95a23c 758 socket_address _address;
11fdf7f2
TL
759 recv_ctx _recv;
760 send_ctx _send;
761 bool _closed;
762public:
9f95a23c 763 posix_udp_channel(const socket_address& bind_address)
11fdf7f2 764 : _closed(false) {
9f95a23c 765 auto sa = bind_address.is_unspecified() ? socket_address(inet_address(inet_address::family::INET)) : bind_address;
11fdf7f2
TL
766 file_desc fd = file_desc::socket(sa.u.sa.sa_family, SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0);
767 fd.setsockopt(SOL_IP, IP_PKTINFO, true);
768 if (engine().posix_reuseport_available()) {
769 fd.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1);
770 }
771 fd.bind(sa.u.sa, sizeof(sa.u.sas));
9f95a23c 772 _address = fd.get_address();
f67539c2 773 _fd = std::move(fd);
11fdf7f2
TL
774 }
775 virtual ~posix_udp_channel() { if (!_closed) close(); };
776 virtual future<udp_datagram> receive() override;
9f95a23c
TL
777 virtual future<> send(const socket_address& dst, const char *msg) override;
778 virtual future<> send(const socket_address& dst, packet p) override;
11fdf7f2 779 virtual void shutdown_input() override {
1e59de90 780 _fd.shutdown(SHUT_RD, pollable_fd::shutdown_kernel_only::no);
11fdf7f2
TL
781 }
782 virtual void shutdown_output() override {
1e59de90 783 _fd.shutdown(SHUT_WR, pollable_fd::shutdown_kernel_only::no);
11fdf7f2
TL
784 }
785 virtual void close() override {
786 _closed = true;
f67539c2 787 _fd = {};
11fdf7f2
TL
788 }
789 virtual bool is_closed() const override { return _closed; }
9f95a23c
TL
790 socket_address local_address() const override {
791 assert(_address.u.sas.ss_family != AF_INET6 || (_address.addr_length > 20));
792 return _address;
793 }
11fdf7f2
TL
794};
795
9f95a23c 796future<> posix_udp_channel::send(const socket_address& dst, const char *message) {
11fdf7f2 797 auto len = strlen(message);
9f95a23c
TL
798 auto a = dst;
799 resolve_outgoing_address(a);
f67539c2 800 return _fd.sendto(a, message, len)
11fdf7f2
TL
801 .then([len] (size_t size) { assert(size == len); });
802}
803
9f95a23c 804future<> posix_udp_channel::send(const socket_address& dst, packet p) {
11fdf7f2
TL
805 auto len = p.len();
806 _send.prepare(dst, std::move(p));
f67539c2 807 return _fd.sendmsg(&_send._hdr)
11fdf7f2
TL
808 .then([len] (size_t size) { assert(size == len); });
809}
810
811udp_channel
9f95a23c 812posix_network_stack::make_udp_channel(const socket_address& addr) {
11fdf7f2
TL
813 return udp_channel(std::make_unique<posix_udp_channel>(addr));
814}
815
9f95a23c
TL
816bool
817posix_network_stack::supports_ipv6() const {
818 static bool has_ipv6 = [] {
819 try {
820 posix_udp_channel c(ipv6_addr{"::1"});
821 c.close();
822 return true;
823 } catch (...) {}
824 return false;
825 }();
826
827 return has_ipv6;
828}
829
11fdf7f2
TL
830class posix_datagram : public udp_datagram_impl {
831private:
9f95a23c
TL
832 socket_address _src;
833 socket_address _dst;
11fdf7f2
TL
834 packet _p;
835public:
9f95a23c
TL
836 posix_datagram(const socket_address& src, const socket_address& dst, packet p) : _src(src), _dst(dst), _p(std::move(p)) {}
837 virtual socket_address get_src() override { return _src; }
838 virtual socket_address get_dst() override { return _dst; }
839 virtual uint16_t get_dst_port() override { return _dst.port(); }
11fdf7f2
TL
840 virtual packet& get_data() override { return _p; }
841};
842
843future<udp_datagram>
844posix_udp_channel::receive() {
845 _recv.prepare();
f67539c2 846 return _fd.recvmsg(&_recv._hdr).then([this] (size_t size) {
9f95a23c
TL
847 socket_address dst;
848 for (auto* cmsg = CMSG_FIRSTHDR(&_recv._hdr); cmsg != nullptr; cmsg = CMSG_NXTHDR(&_recv._hdr, cmsg)) {
849 if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_PKTINFO) {
f67539c2 850 dst = ipv4_addr(copy_reinterpret_cast<in_pktinfo>(CMSG_DATA(cmsg)).ipi_addr, _address.port());
9f95a23c
TL
851 break;
852 } else if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) {
f67539c2 853 dst = ipv6_addr(copy_reinterpret_cast<in6_pktinfo>(CMSG_DATA(cmsg)).ipi6_addr, _address.port());
9f95a23c
TL
854 break;
855 }
856 }
11fdf7f2
TL
857 return make_ready_future<udp_datagram>(udp_datagram(std::make_unique<posix_datagram>(
858 _recv._src_addr, dst, packet(fragment{_recv._buffer, size}, make_deleter([buf = _recv._buffer] { delete[] buf; })))));
859 }).handle_exception([p = _recv._buffer](auto ep) {
860 delete[] p;
861 return make_exception_future<udp_datagram>(std::move(ep));
862 });
863}
864
20effc67
TL
865network_stack_entry register_posix_stack() {
866 return network_stack_entry{
867 "posix", std::make_unique<program_options::option_group>(nullptr, "Posix"),
868 [](const program_options::option_group& ops) {
11fdf7f2
TL
869 return smp::main_thread() ? posix_network_stack::create(ops)
870 : posix_ap_network_stack::create(ops);
871 },
20effc67 872 true};
11fdf7f2 873}
9f95a23c
TL
874
875// nw interface stuff
876
877std::vector<network_interface> posix_network_stack::network_interfaces() {
878 class posix_network_interface_impl final : public network_interface_impl {
879 public:
880 uint32_t _index = 0, _mtu = 0;
881 sstring _name, _display_name;
882 std::vector<net::inet_address> _addresses;
883 std::vector<uint8_t> _hardware_address;
884 bool _loopback = false, _virtual = false, _up = false;
885
886 uint32_t index() const override {
887 return _index;
888 }
889 uint32_t mtu() const override {
890 return _mtu;
891 }
892 const sstring& name() const override {
893 return _name;
894 }
895 const sstring& display_name() const override {
896 return _display_name.empty() ? name() : _display_name;
897 }
898 const std::vector<net::inet_address>& addresses() const override {
899 return _addresses;
900 }
901 const std::vector<uint8_t> hardware_address() const override {
902 return _hardware_address;
903 }
904 bool is_loopback() const override {
905 return _loopback;
906 }
907 bool is_virtual() const override {
908 return _virtual;
909 }
910 bool is_up() const override {
911 // TODO: should be checked on query?
912 return _up;
913 }
914 bool supports_ipv6() const override {
915 // TODO: this is not 100% correct.
916 return std::any_of(_addresses.begin(), _addresses.end(), std::mem_fn(&inet_address::is_ipv6));
917 }
918 };
919
920 // For now, keep an immutable set of interfaces created on start, shared across
921 // shards
922 static const std::vector<posix_network_interface_impl> global_interfaces = [] {
923 auto fd = ::socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
924 throw_system_error_on(fd < 0, "could not open netlink socket");
925
926 std::unique_ptr<int, void(*)(int*)> fd_guard(&fd, [](int* p) { ::close(*p); });
927
928 auto pid = ::getpid();
929
930 sockaddr_nl local = { 0, };
931 local.nl_family = AF_NETLINK;
932 local.nl_pid = pid;
933 local.nl_groups = RTMGRP_IPV6_IFADDR|RTMGRP_IPV4_IFADDR;
934
935 throw_system_error_on(bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0, "could not bind netlink socket");
936
937 /* RTNL socket is ready for use, prepare and send requests */
938
939 std::vector<posix_network_interface_impl> res;
940
941 for (auto msg : { RTM_GETLINK, RTM_GETADDR}) {
942 struct nl_req {
943 nlmsghdr hdr;
944 union {
945 rtgenmsg gen;
946 ifaddrmsg addr;
947 };
948 } req = { {0}, };
949
950 sockaddr_nl kernel = { 0, };
951 msghdr rtnl_msg = { 0, };
952
953 kernel.nl_family = AF_NETLINK; /* fill-in kernel address (destination of our message) */
954
955 req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
956 req.hdr.nlmsg_type = msg;
957 req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
958 req.hdr.nlmsg_seq = 1;
959 req.hdr.nlmsg_pid = pid;
960
961 if (msg == RTM_GETLINK) {
962 req.gen.rtgen_family = AF_PACKET; /* no preferred AF, we will get *all* interfaces */
963 } else {
964 req.addr.ifa_family = AF_UNSPEC;
965 }
966
967 iovec io;
968
969 io.iov_base = &req;
970 io.iov_len = req.hdr.nlmsg_len;
971
972 rtnl_msg.msg_iov = &io;
973 rtnl_msg.msg_iovlen = 1;
974 rtnl_msg.msg_name = &kernel;
975 rtnl_msg.msg_namelen = sizeof(kernel);
976
977 throw_system_error_on(::sendmsg(fd, (struct msghdr *) &rtnl_msg, 0) < 0, "could not send netlink request");
978 /* parse reply */
979
980 constexpr size_t reply_buffer_size = 8192;
981 char reply[reply_buffer_size];
982
983 bool done = false;
984
985 while (!done) {
986 msghdr rtnl_reply = { 0, };
987 iovec io_reply = { 0, };
988
989 io_reply.iov_base = reply;
990 io_reply.iov_len = reply_buffer_size;
991 rtnl_reply.msg_iov = &io_reply;
992 rtnl_reply.msg_iovlen = 1;
993 rtnl_reply.msg_name = &kernel;
994 rtnl_reply.msg_namelen = sizeof(kernel);
995
996 auto len = ::recvmsg(fd, &rtnl_reply, 0); /* read as much data as fits in the receive buffer */
997 if (len <= 0) {
998 return res;
999 }
1000
1001 for (auto* msg_ptr = (struct nlmsghdr *) reply; NLMSG_OK(msg_ptr, len); msg_ptr = NLMSG_NEXT(msg_ptr, len)) {
1002 switch(msg_ptr->nlmsg_type) {
1003 case NLMSG_DONE: // that is all
1004 done = true;
1005 break;
1006 case RTM_NEWLINK:
1007 {
1008 auto* iface = reinterpret_cast<const ifinfomsg*>(NLMSG_DATA(msg_ptr));
1009 auto ilen = msg_ptr->nlmsg_len - NLMSG_LENGTH(sizeof(ifinfomsg));
1010
1011 // todo: filter any non-network interfaces (family)
1012
1013 posix_network_interface_impl nwif;
1014
1015 nwif._index = iface->ifi_index;
1016 nwif._loopback = (iface->ifi_flags & IFF_LOOPBACK) != 0;
1017 nwif._up = (iface->ifi_flags & IFF_UP) != 0;
1018 #if defined(IFF_802_1Q_VLAN) && defined(IFF_EBRIDGE) && defined(IFF_SLAVE_INACTIVE)
1019 nwif._virtual = (iface->ifi_flags & (IFF_802_1Q_VLAN|IFF_EBRIDGE|IFF_SLAVE_INACTIVE)) != 0;
1020 #endif
1021 for (auto* attribute = IFLA_RTA(iface); RTA_OK(attribute, ilen); attribute = RTA_NEXT(attribute, ilen)) {
1022 switch(attribute->rta_type) {
1023 case IFLA_IFNAME:
1024 nwif._name = reinterpret_cast<const char *>(RTA_DATA(attribute));
1025 break;
1026 case IFLA_MTU:
1027 nwif._mtu = *reinterpret_cast<const uint32_t *>(RTA_DATA(attribute));
1028 break;
1029 case IFLA_ADDRESS:
1030 nwif._hardware_address.assign(reinterpret_cast<const uint8_t *>(RTA_DATA(attribute)), reinterpret_cast<const uint8_t *>(RTA_DATA(attribute)) + RTA_PAYLOAD(attribute));
1031 break;
1032 default:
1033 break;
1034 }
1035 }
1036
1037 res.emplace_back(std::move(nwif));
1038
1039 break;
1040 }
1041 case RTM_NEWADDR:
1042 {
1043 auto* addr = reinterpret_cast<const ifaddrmsg*>(NLMSG_DATA(msg_ptr));
1044 auto ilen = msg_ptr->nlmsg_len - NLMSG_LENGTH(sizeof(ifaddrmsg));
1045
1046 for (auto& nwif : res) {
1047 if (nwif._index == addr->ifa_index) {
1048 for (auto* attribute = IFA_RTA(addr); RTA_OK(attribute, ilen); attribute = RTA_NEXT(attribute, ilen)) {
f67539c2 1049 std::optional<inet_address> ia;
9f95a23c
TL
1050
1051 switch(attribute->rta_type) {
1052 case IFA_LOCAL:
1053 case IFA_ADDRESS: // ipv6 addresses are reported only as "ADDRESS"
1054
1055 if (RTA_PAYLOAD(attribute) == sizeof(::in_addr)) {
1056 ia.emplace(*reinterpret_cast<const ::in_addr *>(RTA_DATA(attribute)));
1057 } else if (RTA_PAYLOAD(attribute) == sizeof(::in6_addr)) {
1058 ia.emplace(*reinterpret_cast<const ::in6_addr *>(RTA_DATA(attribute)), nwif.index());
1059 }
1060
1061 if (ia && std::find(nwif._addresses.begin(), nwif._addresses.end(), *ia) == nwif._addresses.end()) {
1062 nwif._addresses.emplace_back(*ia);
1063 }
1064
1065 break;
1066 default:
1067 break;
1068 }
1069 }
1070
1071 break;
1072 }
1073 }
1074 }
1075 default:
1076 break;
1077 }
1078 }
1079 }
1080 }
1081
1082 return res;
1083 }();
1084
1085 // And a similarly immutable set of shared_ptr to network_interface_impl per shard, ready
1086 // to be handed out to callers with minimal overhead
1087 static const thread_local std::vector<shared_ptr<posix_network_interface_impl>> thread_local_interfaces = [] {
1088 std::vector<shared_ptr<posix_network_interface_impl>> res;
1089 res.reserve(global_interfaces.size());
1090 std::transform(global_interfaces.begin(), global_interfaces.end(), std::back_inserter(res), [](const posix_network_interface_impl& impl) {
1091 return make_shared<posix_network_interface_impl>(impl);
1092 });
1093 return res;
1094 }();
1095
1096 return std::vector<network_interface>(thread_local_interfaces.begin(), thread_local_interfaces.end());
1097}
1098
11fdf7f2
TL
1099}
1100
1101}