]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright (C) 2014 Cloudius Systems, Ltd. | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
24 | #include <memory> | |
25 | #include <vector> | |
26 | #include <cstring> | |
27 | #include <seastar/core/future.hh> | |
28 | #include <seastar/net/byteorder.hh> | |
29 | #include <seastar/net/socket_defs.hh> | |
30 | #include <seastar/net/packet.hh> | |
1e59de90 | 31 | #include <seastar/core/internal/api-level.hh> |
11fdf7f2 TL |
32 | #include <seastar/core/temporary_buffer.hh> |
33 | #include <seastar/core/iostream.hh> | |
34 | #include <seastar/util/std-compat.hh> | |
20effc67 | 35 | #include <seastar/util/program-options.hh> |
11fdf7f2 TL |
36 | #include <sys/types.h> |
37 | ||
38 | namespace seastar { | |
39 | ||
9f95a23c | 40 | inline |
f67539c2 | 41 | bool is_ip_unspecified(const ipv4_addr& addr) noexcept { |
9f95a23c | 42 | return addr.is_ip_unspecified(); |
11fdf7f2 TL |
43 | } |
44 | ||
9f95a23c | 45 | inline |
f67539c2 | 46 | bool is_port_unspecified(const ipv4_addr& addr) noexcept { |
9f95a23c | 47 | return addr.is_port_unspecified(); |
11fdf7f2 TL |
48 | } |
49 | ||
9f95a23c | 50 | inline |
f67539c2 | 51 | socket_address make_ipv4_address(const ipv4_addr& addr) noexcept { |
9f95a23c | 52 | return socket_address(addr); |
11fdf7f2 TL |
53 | } |
54 | ||
55 | inline | |
f67539c2 | 56 | socket_address make_ipv4_address(uint32_t ip, uint16_t port) noexcept { |
9f95a23c | 57 | return make_ipv4_address(ipv4_addr(ip, port)); |
11fdf7f2 TL |
58 | } |
59 | ||
60 | namespace net { | |
61 | ||
62 | // see linux tcp(7) for parameter explanation | |
63 | struct tcp_keepalive_params { | |
64 | std::chrono::seconds idle; // TCP_KEEPIDLE | |
65 | std::chrono::seconds interval; // TCP_KEEPINTVL | |
66 | unsigned count; // TCP_KEEPCNT | |
67 | }; | |
68 | ||
69 | // see linux sctp(7) for parameter explanation | |
70 | struct sctp_keepalive_params { | |
71 | std::chrono::seconds interval; // spp_hbinterval | |
72 | unsigned count; // spp_pathmaxrt | |
73 | }; | |
74 | ||
f67539c2 | 75 | using keepalive_params = std::variant<tcp_keepalive_params, sctp_keepalive_params>; |
11fdf7f2 TL |
76 | |
77 | /// \cond internal | |
78 | class connected_socket_impl; | |
79 | class socket_impl; | |
9f95a23c | 80 | |
f67539c2 | 81 | class server_socket_impl; |
11fdf7f2 TL |
82 | class udp_channel_impl; |
83 | class get_impl; | |
84 | /// \endcond | |
85 | ||
86 | class udp_datagram_impl { | |
87 | public: | |
88 | virtual ~udp_datagram_impl() {}; | |
9f95a23c TL |
89 | virtual socket_address get_src() = 0; |
90 | virtual socket_address get_dst() = 0; | |
11fdf7f2 TL |
91 | virtual uint16_t get_dst_port() = 0; |
92 | virtual packet& get_data() = 0; | |
93 | }; | |
94 | ||
95 | class udp_datagram final { | |
96 | private: | |
97 | std::unique_ptr<udp_datagram_impl> _impl; | |
98 | public: | |
f67539c2 | 99 | udp_datagram(std::unique_ptr<udp_datagram_impl>&& impl) noexcept : _impl(std::move(impl)) {}; |
9f95a23c TL |
100 | socket_address get_src() { return _impl->get_src(); } |
101 | socket_address get_dst() { return _impl->get_dst(); } | |
11fdf7f2 TL |
102 | uint16_t get_dst_port() { return _impl->get_dst_port(); } |
103 | packet& get_data() { return _impl->get_data(); } | |
104 | }; | |
105 | ||
106 | class udp_channel { | |
107 | private: | |
108 | std::unique_ptr<udp_channel_impl> _impl; | |
109 | public: | |
f67539c2 TL |
110 | udp_channel() noexcept; |
111 | udp_channel(std::unique_ptr<udp_channel_impl>) noexcept; | |
11fdf7f2 TL |
112 | ~udp_channel(); |
113 | ||
f67539c2 TL |
114 | udp_channel(udp_channel&&) noexcept; |
115 | udp_channel& operator=(udp_channel&&) noexcept; | |
11fdf7f2 | 116 | |
9f95a23c TL |
117 | socket_address local_address() const; |
118 | ||
11fdf7f2 | 119 | future<udp_datagram> receive(); |
9f95a23c TL |
120 | future<> send(const socket_address& dst, const char* msg); |
121 | future<> send(const socket_address& dst, packet p); | |
11fdf7f2 TL |
122 | bool is_closed() const; |
123 | /// Causes a pending receive() to complete (possibly with an exception) | |
124 | void shutdown_input(); | |
125 | /// Causes a pending send() to complete (possibly with an exception) | |
126 | void shutdown_output(); | |
127 | /// Close the channel and releases all resources. | |
128 | /// | |
129 | /// Must be called only when there are no unfinished send() or receive() calls. You | |
130 | /// can force pending calls to complete soon by calling shutdown_input() and | |
131 | /// shutdown_output(). | |
132 | void close(); | |
133 | }; | |
134 | ||
9f95a23c TL |
135 | class network_interface_impl; |
136 | ||
11fdf7f2 TL |
137 | } /* namespace net */ |
138 | ||
139 | /// \addtogroup networking-module | |
140 | /// @{ | |
141 | ||
f67539c2 TL |
142 | /// Configuration for buffered connected_socket input operations |
143 | /// | |
144 | /// This structure allows tuning of buffered input operations done via | |
145 | /// connected_socket. It is a hint to the implementation and may be | |
146 | /// ignored (e.g. the zero-copy native stack does not allocate buffers, | |
147 | /// so it ignores buffer-size parameters). | |
148 | struct connected_socket_input_stream_config final { | |
149 | /// Initial buffer size to use for input buffering | |
150 | unsigned buffer_size = 8192; | |
151 | /// Minimum buffer size to use for input buffering. The system will decrease | |
152 | /// buffer sizes if it sees a tendency towards small requests, but will not go | |
153 | /// below this buffer size. | |
154 | unsigned min_buffer_size = 512; | |
155 | /// Maximum buffer size to use for input buffering. The system will increase | |
156 | /// buffer sizes if it sees a tendency towards large requests, but will not go | |
157 | /// above this buffer size. | |
158 | unsigned max_buffer_size = 128 * 1024; | |
159 | }; | |
160 | ||
1e59de90 TL |
161 | /// Distinguished name |
162 | struct session_dn { | |
163 | sstring subject; | |
164 | sstring issuer; | |
165 | }; | |
166 | ||
11fdf7f2 TL |
167 | /// A TCP (or other stream-based protocol) connection. |
168 | /// | |
169 | /// A \c connected_socket represents a full-duplex stream between | |
170 | /// two endpoints, a local endpoint and a remote endpoint. | |
171 | class connected_socket { | |
172 | friend class net::get_impl; | |
173 | std::unique_ptr<net::connected_socket_impl> _csi; | |
174 | public: | |
175 | /// Constructs a \c connected_socket not corresponding to a connection | |
f67539c2 | 176 | connected_socket() noexcept; |
11fdf7f2 TL |
177 | ~connected_socket(); |
178 | ||
179 | /// \cond internal | |
f67539c2 | 180 | explicit connected_socket(std::unique_ptr<net::connected_socket_impl> csi) noexcept; |
11fdf7f2 TL |
181 | /// \endcond |
182 | /// Moves a \c connected_socket object. | |
183 | connected_socket(connected_socket&& cs) noexcept; | |
184 | /// Move-assigns a \c connected_socket object. | |
185 | connected_socket& operator=(connected_socket&& cs) noexcept; | |
186 | /// Gets the input stream. | |
187 | /// | |
f67539c2 TL |
188 | /// \param csisc Configuration for the input_stream returned |
189 | /// | |
11fdf7f2 | 190 | /// Gets an object returning data sent from the remote endpoint. |
f67539c2 | 191 | input_stream<char> input(connected_socket_input_stream_config csisc = {}); |
11fdf7f2 TL |
192 | /// Gets the output stream. |
193 | /// | |
194 | /// Gets an object that sends data to the remote endpoint. | |
195 | /// \param buffer_size how much data to buffer | |
196 | output_stream<char> output(size_t buffer_size = 8192); | |
197 | /// Sets the TCP_NODELAY option (disabling Nagle's algorithm) | |
198 | void set_nodelay(bool nodelay); | |
199 | /// Gets the TCP_NODELAY option (Nagle's algorithm) | |
200 | /// | |
201 | /// \return whether the nodelay option is enabled or not | |
202 | bool get_nodelay() const; | |
203 | /// Sets SO_KEEPALIVE option (enable keepalive timer on a socket) | |
204 | void set_keepalive(bool keepalive); | |
205 | /// Gets O_KEEPALIVE option | |
206 | /// \return whether the keepalive option is enabled or not | |
207 | bool get_keepalive() const; | |
208 | /// Sets TCP keepalive parameters | |
209 | void set_keepalive_parameters(const net::keepalive_params& p); | |
210 | /// Get TCP keepalive parameters | |
211 | net::keepalive_params get_keepalive_parameters() const; | |
f67539c2 TL |
212 | /// Sets custom socket options. Based on setsockopt function. |
213 | /// Linux users should refer to protocol-specific manuals | |
214 | /// to see available options, e.g. tcp(7), ip(7), etc. | |
215 | void set_sockopt(int level, int optname, const void* data, size_t len); | |
216 | /// Gets custom socket options. Based on getsockopt function. | |
217 | /// Linux users should refer to protocol-specific manuals | |
218 | /// to see available options, e.g. tcp(7), ip(7), etc. | |
219 | int get_sockopt(int level, int optname, void* data, size_t len) const; | |
20effc67 TL |
220 | /// Local address of the socket |
221 | socket_address local_address() const noexcept; | |
11fdf7f2 TL |
222 | |
223 | /// Disables output to the socket. | |
224 | /// | |
225 | /// Current or future writes that have not been successfully flushed | |
226 | /// will immediately fail with an error. This is useful to abort | |
227 | /// operations on a socket that is not making progress due to a | |
228 | /// peer failure. | |
229 | void shutdown_output(); | |
230 | /// Disables input from the socket. | |
231 | /// | |
232 | /// Current or future reads will immediately fail with an error. | |
233 | /// This is useful to abort operations on a socket that is not making | |
234 | /// progress due to a peer failure. | |
235 | void shutdown_input(); | |
1e59de90 TL |
236 | /// Check whether the \c connected_socket is initialized. |
237 | /// | |
238 | /// \return true if this \c connected_socket socket_address is bound initialized | |
239 | /// false otherwise. | |
240 | /// | |
241 | /// \see connect(socket_address sa) | |
242 | /// \see connect(socket_address sa, socket_address local, transport proto) | |
243 | explicit operator bool() const noexcept { | |
244 | return static_cast<bool>(_csi); | |
245 | } | |
246 | /// Waits for the peer of this socket to disconnect | |
247 | /// | |
248 | /// \return future that resolves when the peer closes connection or shuts it down | |
249 | /// for writing or when local socket is called \ref shutdown_input(). | |
250 | /// | |
251 | /// Note, that when the returned future is resolved for whatever reason socket | |
252 | /// may still be readable from, so the caller may want to wait for both events | |
253 | /// -- this one and EOF from read. | |
254 | /// | |
255 | /// Calling it several times per socket is not allowed (undefined behavior) | |
256 | /// | |
257 | /// \see poll(2) about POLLRDHUP for more details | |
258 | future<> wait_input_shutdown(); | |
11fdf7f2 TL |
259 | }; |
260 | /// @} | |
261 | ||
262 | /// \addtogroup networking-module | |
263 | /// @{ | |
264 | ||
265 | /// The seastar socket. | |
266 | /// | |
267 | /// A \c socket that allows a connection to be established between | |
268 | /// two endpoints. | |
269 | class socket { | |
270 | std::unique_ptr<net::socket_impl> _si; | |
271 | public: | |
f67539c2 | 272 | socket() noexcept = default; |
11fdf7f2 TL |
273 | ~socket(); |
274 | ||
275 | /// \cond internal | |
f67539c2 | 276 | explicit socket(std::unique_ptr<net::socket_impl> si) noexcept; |
11fdf7f2 TL |
277 | /// \endcond |
278 | /// Moves a \c seastar::socket object. | |
279 | socket(socket&&) noexcept; | |
280 | /// Move-assigns a \c seastar::socket object. | |
281 | socket& operator=(socket&&) noexcept; | |
282 | ||
283 | /// Attempts to establish the connection. | |
284 | /// | |
285 | /// \return a \ref connected_socket representing the connection. | |
9f95a23c TL |
286 | future<connected_socket> connect(socket_address sa, socket_address local = {}, transport proto = transport::TCP); |
287 | ||
288 | /// Sets SO_REUSEADDR option (enable reuseaddr option on a socket) | |
289 | void set_reuseaddr(bool reuseaddr); | |
290 | /// Gets O_REUSEADDR option | |
291 | /// \return whether the reuseaddr option is enabled or not | |
292 | bool get_reuseaddr() const; | |
11fdf7f2 TL |
293 | /// Stops any in-flight connection attempt. |
294 | /// | |
295 | /// Cancels the connection attempt if it's still in progress, and | |
296 | /// terminates the connection if it has already been established. | |
297 | void shutdown(); | |
298 | }; | |
299 | ||
300 | /// @} | |
301 | ||
302 | /// \addtogroup networking-module | |
303 | /// @{ | |
304 | ||
9f95a23c TL |
305 | /// The result of an server_socket::accept() call |
306 | struct accept_result { | |
307 | connected_socket connection; ///< The newly-accepted connection | |
308 | socket_address remote_address; ///< The address of the peer that connected to us | |
309 | }; | |
310 | ||
11fdf7f2 TL |
311 | /// A listening socket, waiting to accept incoming network connections. |
312 | class server_socket { | |
f67539c2 | 313 | std::unique_ptr<net::server_socket_impl> _ssi; |
11fdf7f2 TL |
314 | bool _aborted = false; |
315 | public: | |
316 | enum class load_balancing_algorithm { | |
317 | // This algorithm tries to distribute all connections equally between all shards. | |
318 | // It does this by sending new connections to a shard with smallest amount of connections. | |
319 | connection_distribution, | |
320 | // This algorithm distributes new connection based on peer's tcp port. Destination shard | |
321 | // is calculated as a port number modulo number of shards. This allows a client to connect | |
322 | // to a specific shard in a server given it knows how many shards server has by choosing | |
323 | // src port number accordingly. | |
324 | port, | |
9f95a23c TL |
325 | // This algorithm distributes all new connections to listen_options::fixed_cpu shard only. |
326 | fixed, | |
11fdf7f2 TL |
327 | default_ = connection_distribution |
328 | }; | |
1e59de90 | 329 | /// Constructs a \c server_socket without being bound to any address |
f67539c2 | 330 | server_socket() noexcept; |
11fdf7f2 | 331 | /// \cond internal |
f67539c2 | 332 | explicit server_socket(std::unique_ptr<net::server_socket_impl> ssi) noexcept; |
11fdf7f2 TL |
333 | /// \endcond |
334 | /// Moves a \c server_socket object. | |
335 | server_socket(server_socket&& ss) noexcept; | |
336 | ~server_socket(); | |
337 | /// Move-assigns a \c server_socket object. | |
338 | server_socket& operator=(server_socket&& cs) noexcept; | |
339 | ||
340 | /// Accepts the next connection to successfully connect to this socket. | |
341 | /// | |
9f95a23c TL |
342 | /// \return an accept_result representing the connection and |
343 | /// the socket_address of the remote endpoint. | |
11fdf7f2 TL |
344 | /// |
345 | /// \see listen(socket_address sa) | |
346 | /// \see listen(socket_address sa, listen_options opts) | |
9f95a23c | 347 | future<accept_result> accept(); |
11fdf7f2 TL |
348 | |
349 | /// Stops any \ref accept() in progress. | |
350 | /// | |
351 | /// Current and future \ref accept() calls will terminate immediately | |
352 | /// with an error. | |
353 | void abort_accept(); | |
9f95a23c TL |
354 | |
355 | /// Local bound address | |
1e59de90 TL |
356 | /// |
357 | /// \return the local bound address if the \c server_socket is listening, | |
358 | /// an empty address constructed with \c socket_address() otherwise. | |
359 | /// | |
360 | /// \see listen(socket_address sa) | |
361 | /// \see listen(socket_address sa, listen_options opts) | |
f67539c2 | 362 | socket_address local_address() const noexcept; |
1e59de90 TL |
363 | |
364 | /// Check whether the \c server_socket is listening on any address. | |
365 | /// | |
366 | /// \return true if this \c socket_address is bound to an address, | |
367 | /// false if it is just created with the default constructor. | |
368 | /// | |
369 | /// \see listen(socket_address sa) | |
370 | /// \see listen(socket_address sa, listen_options opts) | |
371 | explicit operator bool() const noexcept { | |
372 | return static_cast<bool>(_ssi); | |
373 | } | |
11fdf7f2 | 374 | }; |
9f95a23c | 375 | |
11fdf7f2 TL |
376 | /// @} |
377 | ||
378 | struct listen_options { | |
379 | bool reuse_address = false; | |
380 | server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_; | |
381 | transport proto = transport::TCP; | |
9f95a23c TL |
382 | int listen_backlog = 100; |
383 | unsigned fixed_cpu = 0u; | |
384 | void set_fixed_cpu(unsigned cpu) { | |
385 | lba = server_socket::load_balancing_algorithm::fixed; | |
386 | fixed_cpu = cpu; | |
387 | } | |
388 | }; | |
389 | ||
390 | class network_interface { | |
391 | private: | |
392 | shared_ptr<net::network_interface_impl> _impl; | |
393 | public: | |
f67539c2 TL |
394 | network_interface() = delete; |
395 | network_interface(shared_ptr<net::network_interface_impl>) noexcept; | |
396 | network_interface(network_interface&&) noexcept; | |
9f95a23c | 397 | |
f67539c2 | 398 | network_interface& operator=(network_interface&&) noexcept; |
9f95a23c TL |
399 | |
400 | uint32_t index() const; | |
401 | uint32_t mtu() const; | |
402 | ||
403 | const sstring& name() const; | |
404 | const sstring& display_name() const; | |
405 | const std::vector<net::inet_address>& addresses() const; | |
406 | const std::vector<uint8_t> hardware_address() const; | |
407 | ||
408 | bool is_loopback() const; | |
409 | bool is_virtual() const; | |
410 | bool is_up() const; | |
411 | bool supports_ipv6() const; | |
11fdf7f2 TL |
412 | }; |
413 | ||
414 | class network_stack { | |
415 | public: | |
416 | virtual ~network_stack() {} | |
417 | virtual server_socket listen(socket_address sa, listen_options opts) = 0; | |
418 | // FIXME: local parameter assumes ipv4 for now, fix when adding other AF | |
9f95a23c | 419 | future<connected_socket> connect(socket_address sa, socket_address = {}, transport proto = transport::TCP); |
11fdf7f2 | 420 | virtual ::seastar::socket socket() = 0; |
9f95a23c | 421 | virtual net::udp_channel make_udp_channel(const socket_address& = {}) = 0; |
11fdf7f2 TL |
422 | virtual future<> initialize() { |
423 | return make_ready_future(); | |
424 | } | |
425 | virtual bool has_per_core_namespace() = 0; | |
9f95a23c TL |
426 | // NOTE: this is not a correct query approach. |
427 | // This question should be per NIC, but we have no such | |
428 | // abstraction, so for now this is "stack-wide" | |
429 | virtual bool supports_ipv6() const { | |
430 | return false; | |
431 | } | |
432 | ||
433 | /** | |
434 | * Returns available network interfaces. This represents a | |
435 | * snapshot of interfaces available at call time, hence the | |
436 | * return by value. | |
437 | */ | |
438 | virtual std::vector<network_interface> network_interfaces(); | |
11fdf7f2 TL |
439 | }; |
440 | ||
20effc67 TL |
441 | struct network_stack_entry { |
442 | using factory_func = noncopyable_function<future<std::unique_ptr<network_stack>> (const program_options::option_group&)>; | |
443 | ||
444 | sstring name; | |
445 | std::unique_ptr<program_options::option_group> opts; | |
446 | factory_func factory; | |
447 | bool is_default; | |
448 | }; | |
449 | ||
11fdf7f2 | 450 | } |