]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright (C) 2014 Cloudius Systems, Ltd. | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
24 | #include <memory> | |
25 | #include <vector> | |
26 | #include <cstring> | |
27 | #include <seastar/core/future.hh> | |
28 | #include <seastar/net/byteorder.hh> | |
29 | #include <seastar/net/socket_defs.hh> | |
30 | #include <seastar/net/packet.hh> | |
11fdf7f2 TL |
31 | #include <seastar/core/temporary_buffer.hh> |
32 | #include <seastar/core/iostream.hh> | |
33 | #include <seastar/util/std-compat.hh> | |
9f95a23c | 34 | #include "../core/internal/api-level.hh" |
11fdf7f2 TL |
35 | #include <sys/types.h> |
36 | ||
37 | namespace seastar { | |
38 | ||
9f95a23c TL |
39 | inline |
40 | bool is_ip_unspecified(const ipv4_addr& addr) { | |
41 | return addr.is_ip_unspecified(); | |
11fdf7f2 TL |
42 | } |
43 | ||
9f95a23c TL |
44 | inline |
45 | bool is_port_unspecified(const ipv4_addr& addr) { | |
46 | return addr.is_port_unspecified(); | |
11fdf7f2 TL |
47 | } |
48 | ||
9f95a23c TL |
49 | inline |
50 | socket_address make_ipv4_address(const ipv4_addr& addr) { | |
51 | return socket_address(addr); | |
11fdf7f2 TL |
52 | } |
53 | ||
54 | inline | |
55 | socket_address make_ipv4_address(uint32_t ip, uint16_t port) { | |
9f95a23c | 56 | return make_ipv4_address(ipv4_addr(ip, port)); |
11fdf7f2 TL |
57 | } |
58 | ||
59 | namespace net { | |
60 | ||
61 | // see linux tcp(7) for parameter explanation | |
62 | struct tcp_keepalive_params { | |
63 | std::chrono::seconds idle; // TCP_KEEPIDLE | |
64 | std::chrono::seconds interval; // TCP_KEEPINTVL | |
65 | unsigned count; // TCP_KEEPCNT | |
66 | }; | |
67 | ||
68 | // see linux sctp(7) for parameter explanation | |
69 | struct sctp_keepalive_params { | |
70 | std::chrono::seconds interval; // spp_hbinterval | |
71 | unsigned count; // spp_pathmaxrt | |
72 | }; | |
73 | ||
74 | using keepalive_params = compat::variant<tcp_keepalive_params, sctp_keepalive_params>; | |
75 | ||
76 | /// \cond internal | |
77 | class connected_socket_impl; | |
78 | class socket_impl; | |
9f95a23c TL |
79 | |
80 | #if SEASTAR_API_LEVEL <= 1 | |
81 | ||
82 | SEASTAR_INCLUDE_API_V1 namespace api_v1 { class server_socket_impl; } | |
83 | ||
84 | #endif | |
85 | ||
86 | SEASTAR_INCLUDE_API_V2 namespace api_v2 { class server_socket_impl; } | |
11fdf7f2 TL |
87 | class udp_channel_impl; |
88 | class get_impl; | |
89 | /// \endcond | |
90 | ||
91 | class udp_datagram_impl { | |
92 | public: | |
93 | virtual ~udp_datagram_impl() {}; | |
9f95a23c TL |
94 | virtual socket_address get_src() = 0; |
95 | virtual socket_address get_dst() = 0; | |
11fdf7f2 TL |
96 | virtual uint16_t get_dst_port() = 0; |
97 | virtual packet& get_data() = 0; | |
98 | }; | |
99 | ||
100 | class udp_datagram final { | |
101 | private: | |
102 | std::unique_ptr<udp_datagram_impl> _impl; | |
103 | public: | |
104 | udp_datagram(std::unique_ptr<udp_datagram_impl>&& impl) : _impl(std::move(impl)) {}; | |
9f95a23c TL |
105 | socket_address get_src() { return _impl->get_src(); } |
106 | socket_address get_dst() { return _impl->get_dst(); } | |
11fdf7f2 TL |
107 | uint16_t get_dst_port() { return _impl->get_dst_port(); } |
108 | packet& get_data() { return _impl->get_data(); } | |
109 | }; | |
110 | ||
111 | class udp_channel { | |
112 | private: | |
113 | std::unique_ptr<udp_channel_impl> _impl; | |
114 | public: | |
115 | udp_channel(); | |
116 | udp_channel(std::unique_ptr<udp_channel_impl>); | |
117 | ~udp_channel(); | |
118 | ||
119 | udp_channel(udp_channel&&); | |
120 | udp_channel& operator=(udp_channel&&); | |
121 | ||
9f95a23c TL |
122 | socket_address local_address() const; |
123 | ||
11fdf7f2 | 124 | future<udp_datagram> receive(); |
9f95a23c TL |
125 | future<> send(const socket_address& dst, const char* msg); |
126 | future<> send(const socket_address& dst, packet p); | |
11fdf7f2 TL |
127 | bool is_closed() const; |
128 | /// Causes a pending receive() to complete (possibly with an exception) | |
129 | void shutdown_input(); | |
130 | /// Causes a pending send() to complete (possibly with an exception) | |
131 | void shutdown_output(); | |
132 | /// Close the channel and releases all resources. | |
133 | /// | |
134 | /// Must be called only when there are no unfinished send() or receive() calls. You | |
135 | /// can force pending calls to complete soon by calling shutdown_input() and | |
136 | /// shutdown_output(). | |
137 | void close(); | |
138 | }; | |
139 | ||
9f95a23c TL |
140 | class network_interface_impl; |
141 | ||
11fdf7f2 TL |
142 | } /* namespace net */ |
143 | ||
144 | /// \addtogroup networking-module | |
145 | /// @{ | |
146 | ||
147 | /// A TCP (or other stream-based protocol) connection. | |
148 | /// | |
149 | /// A \c connected_socket represents a full-duplex stream between | |
150 | /// two endpoints, a local endpoint and a remote endpoint. | |
151 | class connected_socket { | |
152 | friend class net::get_impl; | |
153 | std::unique_ptr<net::connected_socket_impl> _csi; | |
154 | public: | |
155 | /// Constructs a \c connected_socket not corresponding to a connection | |
156 | connected_socket(); | |
157 | ~connected_socket(); | |
158 | ||
159 | /// \cond internal | |
160 | explicit connected_socket(std::unique_ptr<net::connected_socket_impl> csi); | |
161 | /// \endcond | |
162 | /// Moves a \c connected_socket object. | |
163 | connected_socket(connected_socket&& cs) noexcept; | |
164 | /// Move-assigns a \c connected_socket object. | |
165 | connected_socket& operator=(connected_socket&& cs) noexcept; | |
166 | /// Gets the input stream. | |
167 | /// | |
168 | /// Gets an object returning data sent from the remote endpoint. | |
169 | input_stream<char> input(); | |
170 | /// Gets the output stream. | |
171 | /// | |
172 | /// Gets an object that sends data to the remote endpoint. | |
173 | /// \param buffer_size how much data to buffer | |
174 | output_stream<char> output(size_t buffer_size = 8192); | |
175 | /// Sets the TCP_NODELAY option (disabling Nagle's algorithm) | |
176 | void set_nodelay(bool nodelay); | |
177 | /// Gets the TCP_NODELAY option (Nagle's algorithm) | |
178 | /// | |
179 | /// \return whether the nodelay option is enabled or not | |
180 | bool get_nodelay() const; | |
181 | /// Sets SO_KEEPALIVE option (enable keepalive timer on a socket) | |
182 | void set_keepalive(bool keepalive); | |
183 | /// Gets O_KEEPALIVE option | |
184 | /// \return whether the keepalive option is enabled or not | |
185 | bool get_keepalive() const; | |
186 | /// Sets TCP keepalive parameters | |
187 | void set_keepalive_parameters(const net::keepalive_params& p); | |
188 | /// Get TCP keepalive parameters | |
189 | net::keepalive_params get_keepalive_parameters() const; | |
190 | ||
191 | /// Disables output to the socket. | |
192 | /// | |
193 | /// Current or future writes that have not been successfully flushed | |
194 | /// will immediately fail with an error. This is useful to abort | |
195 | /// operations on a socket that is not making progress due to a | |
196 | /// peer failure. | |
197 | void shutdown_output(); | |
198 | /// Disables input from the socket. | |
199 | /// | |
200 | /// Current or future reads will immediately fail with an error. | |
201 | /// This is useful to abort operations on a socket that is not making | |
202 | /// progress due to a peer failure. | |
203 | void shutdown_input(); | |
204 | }; | |
205 | /// @} | |
206 | ||
207 | /// \addtogroup networking-module | |
208 | /// @{ | |
209 | ||
210 | /// The seastar socket. | |
211 | /// | |
212 | /// A \c socket that allows a connection to be established between | |
213 | /// two endpoints. | |
214 | class socket { | |
215 | std::unique_ptr<net::socket_impl> _si; | |
216 | public: | |
217 | ~socket(); | |
218 | ||
219 | /// \cond internal | |
220 | explicit socket(std::unique_ptr<net::socket_impl> si); | |
221 | /// \endcond | |
222 | /// Moves a \c seastar::socket object. | |
223 | socket(socket&&) noexcept; | |
224 | /// Move-assigns a \c seastar::socket object. | |
225 | socket& operator=(socket&&) noexcept; | |
226 | ||
227 | /// Attempts to establish the connection. | |
228 | /// | |
229 | /// \return a \ref connected_socket representing the connection. | |
9f95a23c TL |
230 | future<connected_socket> connect(socket_address sa, socket_address local = {}, transport proto = transport::TCP); |
231 | ||
232 | /// Sets SO_REUSEADDR option (enable reuseaddr option on a socket) | |
233 | void set_reuseaddr(bool reuseaddr); | |
234 | /// Gets O_REUSEADDR option | |
235 | /// \return whether the reuseaddr option is enabled or not | |
236 | bool get_reuseaddr() const; | |
11fdf7f2 TL |
237 | /// Stops any in-flight connection attempt. |
238 | /// | |
239 | /// Cancels the connection attempt if it's still in progress, and | |
240 | /// terminates the connection if it has already been established. | |
241 | void shutdown(); | |
242 | }; | |
243 | ||
244 | /// @} | |
245 | ||
246 | /// \addtogroup networking-module | |
247 | /// @{ | |
248 | ||
9f95a23c TL |
249 | /// The result of an server_socket::accept() call |
250 | struct accept_result { | |
251 | connected_socket connection; ///< The newly-accepted connection | |
252 | socket_address remote_address; ///< The address of the peer that connected to us | |
253 | }; | |
254 | ||
255 | SEASTAR_INCLUDE_API_V2 namespace api_v2 { | |
256 | ||
11fdf7f2 TL |
257 | /// A listening socket, waiting to accept incoming network connections. |
258 | class server_socket { | |
9f95a23c | 259 | std::unique_ptr<net::api_v2::server_socket_impl> _ssi; |
11fdf7f2 TL |
260 | bool _aborted = false; |
261 | public: | |
262 | enum class load_balancing_algorithm { | |
263 | // This algorithm tries to distribute all connections equally between all shards. | |
264 | // It does this by sending new connections to a shard with smallest amount of connections. | |
265 | connection_distribution, | |
266 | // This algorithm distributes new connection based on peer's tcp port. Destination shard | |
267 | // is calculated as a port number modulo number of shards. This allows a client to connect | |
268 | // to a specific shard in a server given it knows how many shards server has by choosing | |
269 | // src port number accordingly. | |
270 | port, | |
9f95a23c TL |
271 | // This algorithm distributes all new connections to listen_options::fixed_cpu shard only. |
272 | fixed, | |
11fdf7f2 TL |
273 | default_ = connection_distribution |
274 | }; | |
275 | /// Constructs a \c server_socket not corresponding to a connection | |
276 | server_socket(); | |
277 | /// \cond internal | |
9f95a23c | 278 | explicit server_socket(std::unique_ptr<net::api_v2::server_socket_impl> ssi); |
11fdf7f2 TL |
279 | /// \endcond |
280 | /// Moves a \c server_socket object. | |
281 | server_socket(server_socket&& ss) noexcept; | |
282 | ~server_socket(); | |
283 | /// Move-assigns a \c server_socket object. | |
284 | server_socket& operator=(server_socket&& cs) noexcept; | |
285 | ||
286 | /// Accepts the next connection to successfully connect to this socket. | |
287 | /// | |
9f95a23c TL |
288 | /// \return an accept_result representing the connection and |
289 | /// the socket_address of the remote endpoint. | |
11fdf7f2 TL |
290 | /// |
291 | /// \see listen(socket_address sa) | |
292 | /// \see listen(socket_address sa, listen_options opts) | |
9f95a23c | 293 | future<accept_result> accept(); |
11fdf7f2 TL |
294 | |
295 | /// Stops any \ref accept() in progress. | |
296 | /// | |
297 | /// Current and future \ref accept() calls will terminate immediately | |
298 | /// with an error. | |
299 | void abort_accept(); | |
9f95a23c TL |
300 | |
301 | /// Local bound address | |
302 | socket_address local_address() const; | |
303 | }; | |
304 | ||
305 | } | |
306 | ||
307 | #if SEASTAR_API_LEVEL <= 1 | |
308 | ||
309 | SEASTAR_INCLUDE_API_V1 namespace api_v1 { | |
310 | ||
311 | class server_socket { | |
312 | api_v2::server_socket _impl; | |
313 | private: | |
314 | static api_v2::server_socket make_v2_server_socket(std::unique_ptr<net::api_v1::server_socket_impl>); | |
315 | public: | |
316 | using load_balancing_algorithm = api_v2::server_socket::load_balancing_algorithm; | |
317 | server_socket(); | |
318 | explicit server_socket(std::unique_ptr<net::api_v1::server_socket_impl> ssi); | |
319 | explicit server_socket(std::unique_ptr<net::api_v2::server_socket_impl> ssi); | |
320 | server_socket(server_socket&& ss) noexcept; | |
321 | server_socket(api_v2::server_socket&& ss); | |
322 | ~server_socket(); | |
323 | operator api_v2::server_socket() &&; | |
324 | server_socket& operator=(server_socket&& cs) noexcept; | |
325 | future<connected_socket, socket_address> accept(); | |
326 | void abort_accept(); | |
327 | socket_address local_address() const; | |
11fdf7f2 | 328 | }; |
9f95a23c TL |
329 | |
330 | } | |
331 | ||
332 | #endif | |
333 | ||
11fdf7f2 TL |
334 | /// @} |
335 | ||
336 | struct listen_options { | |
337 | bool reuse_address = false; | |
338 | server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_; | |
339 | transport proto = transport::TCP; | |
9f95a23c TL |
340 | int listen_backlog = 100; |
341 | unsigned fixed_cpu = 0u; | |
342 | void set_fixed_cpu(unsigned cpu) { | |
343 | lba = server_socket::load_balancing_algorithm::fixed; | |
344 | fixed_cpu = cpu; | |
345 | } | |
346 | }; | |
347 | ||
348 | class network_interface { | |
349 | private: | |
350 | shared_ptr<net::network_interface_impl> _impl; | |
351 | public: | |
352 | network_interface(shared_ptr<net::network_interface_impl>); | |
353 | network_interface(network_interface&&); | |
354 | ||
355 | network_interface& operator=(network_interface&&); | |
356 | ||
357 | uint32_t index() const; | |
358 | uint32_t mtu() const; | |
359 | ||
360 | const sstring& name() const; | |
361 | const sstring& display_name() const; | |
362 | const std::vector<net::inet_address>& addresses() const; | |
363 | const std::vector<uint8_t> hardware_address() const; | |
364 | ||
365 | bool is_loopback() const; | |
366 | bool is_virtual() const; | |
367 | bool is_up() const; | |
368 | bool supports_ipv6() const; | |
11fdf7f2 TL |
369 | }; |
370 | ||
371 | class network_stack { | |
372 | public: | |
373 | virtual ~network_stack() {} | |
374 | virtual server_socket listen(socket_address sa, listen_options opts) = 0; | |
375 | // FIXME: local parameter assumes ipv4 for now, fix when adding other AF | |
9f95a23c | 376 | future<connected_socket> connect(socket_address sa, socket_address = {}, transport proto = transport::TCP); |
11fdf7f2 | 377 | virtual ::seastar::socket socket() = 0; |
9f95a23c | 378 | virtual net::udp_channel make_udp_channel(const socket_address& = {}) = 0; |
11fdf7f2 TL |
379 | virtual future<> initialize() { |
380 | return make_ready_future(); | |
381 | } | |
382 | virtual bool has_per_core_namespace() = 0; | |
9f95a23c TL |
383 | // NOTE: this is not a correct query approach. |
384 | // This question should be per NIC, but we have no such | |
385 | // abstraction, so for now this is "stack-wide" | |
386 | virtual bool supports_ipv6() const { | |
387 | return false; | |
388 | } | |
389 | ||
390 | /** | |
391 | * Returns available network interfaces. This represents a | |
392 | * snapshot of interfaces available at call time, hence the | |
393 | * return by value. | |
394 | */ | |
395 | virtual std::vector<network_interface> network_interfaces(); | |
11fdf7f2 TL |
396 | }; |
397 | ||
398 | } |