]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/include/seastar/net/api.hh
import quincy beta 17.1.0
[ceph.git] / ceph / src / seastar / include / seastar / net / api.hh
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22#pragma once
23
24#include <memory>
25#include <vector>
26#include <cstring>
27#include <seastar/core/future.hh>
28#include <seastar/net/byteorder.hh>
29#include <seastar/net/socket_defs.hh>
30#include <seastar/net/packet.hh>
11fdf7f2
TL
31#include <seastar/core/temporary_buffer.hh>
32#include <seastar/core/iostream.hh>
33#include <seastar/util/std-compat.hh>
20effc67 34#include <seastar/util/program-options.hh>
9f95a23c 35#include "../core/internal/api-level.hh"
11fdf7f2
TL
36#include <sys/types.h>
37
38namespace seastar {
39
9f95a23c 40inline
f67539c2 41bool is_ip_unspecified(const ipv4_addr& addr) noexcept {
9f95a23c 42 return addr.is_ip_unspecified();
11fdf7f2
TL
43}
44
9f95a23c 45inline
f67539c2 46bool is_port_unspecified(const ipv4_addr& addr) noexcept {
9f95a23c 47 return addr.is_port_unspecified();
11fdf7f2
TL
48}
49
9f95a23c 50inline
f67539c2 51socket_address make_ipv4_address(const ipv4_addr& addr) noexcept {
9f95a23c 52 return socket_address(addr);
11fdf7f2
TL
53}
54
55inline
f67539c2 56socket_address make_ipv4_address(uint32_t ip, uint16_t port) noexcept {
9f95a23c 57 return make_ipv4_address(ipv4_addr(ip, port));
11fdf7f2
TL
58}
59
60namespace net {
61
62// see linux tcp(7) for parameter explanation
63struct tcp_keepalive_params {
64 std::chrono::seconds idle; // TCP_KEEPIDLE
65 std::chrono::seconds interval; // TCP_KEEPINTVL
66 unsigned count; // TCP_KEEPCNT
67};
68
69// see linux sctp(7) for parameter explanation
70struct sctp_keepalive_params {
71 std::chrono::seconds interval; // spp_hbinterval
72 unsigned count; // spp_pathmaxrt
73};
74
f67539c2 75using keepalive_params = std::variant<tcp_keepalive_params, sctp_keepalive_params>;
11fdf7f2
TL
76
77/// \cond internal
78class connected_socket_impl;
79class socket_impl;
9f95a23c 80
f67539c2 81class server_socket_impl;
11fdf7f2
TL
82class udp_channel_impl;
83class get_impl;
84/// \endcond
85
86class udp_datagram_impl {
87public:
88 virtual ~udp_datagram_impl() {};
9f95a23c
TL
89 virtual socket_address get_src() = 0;
90 virtual socket_address get_dst() = 0;
11fdf7f2
TL
91 virtual uint16_t get_dst_port() = 0;
92 virtual packet& get_data() = 0;
93};
94
95class udp_datagram final {
96private:
97 std::unique_ptr<udp_datagram_impl> _impl;
98public:
f67539c2 99 udp_datagram(std::unique_ptr<udp_datagram_impl>&& impl) noexcept : _impl(std::move(impl)) {};
9f95a23c
TL
100 socket_address get_src() { return _impl->get_src(); }
101 socket_address get_dst() { return _impl->get_dst(); }
11fdf7f2
TL
102 uint16_t get_dst_port() { return _impl->get_dst_port(); }
103 packet& get_data() { return _impl->get_data(); }
104};
105
106class udp_channel {
107private:
108 std::unique_ptr<udp_channel_impl> _impl;
109public:
f67539c2
TL
110 udp_channel() noexcept;
111 udp_channel(std::unique_ptr<udp_channel_impl>) noexcept;
11fdf7f2
TL
112 ~udp_channel();
113
f67539c2
TL
114 udp_channel(udp_channel&&) noexcept;
115 udp_channel& operator=(udp_channel&&) noexcept;
11fdf7f2 116
9f95a23c
TL
117 socket_address local_address() const;
118
11fdf7f2 119 future<udp_datagram> receive();
9f95a23c
TL
120 future<> send(const socket_address& dst, const char* msg);
121 future<> send(const socket_address& dst, packet p);
11fdf7f2
TL
122 bool is_closed() const;
123 /// Causes a pending receive() to complete (possibly with an exception)
124 void shutdown_input();
125 /// Causes a pending send() to complete (possibly with an exception)
126 void shutdown_output();
127 /// Close the channel and releases all resources.
128 ///
129 /// Must be called only when there are no unfinished send() or receive() calls. You
130 /// can force pending calls to complete soon by calling shutdown_input() and
131 /// shutdown_output().
132 void close();
133};
134
9f95a23c
TL
135class network_interface_impl;
136
11fdf7f2
TL
137} /* namespace net */
138
139/// \addtogroup networking-module
140/// @{
141
f67539c2
TL
142/// Configuration for buffered connected_socket input operations
143///
144/// This structure allows tuning of buffered input operations done via
145/// connected_socket. It is a hint to the implementation and may be
146/// ignored (e.g. the zero-copy native stack does not allocate buffers,
147/// so it ignores buffer-size parameters).
148struct connected_socket_input_stream_config final {
149 /// Initial buffer size to use for input buffering
150 unsigned buffer_size = 8192;
151 /// Minimum buffer size to use for input buffering. The system will decrease
152 /// buffer sizes if it sees a tendency towards small requests, but will not go
153 /// below this buffer size.
154 unsigned min_buffer_size = 512;
155 /// Maximum buffer size to use for input buffering. The system will increase
156 /// buffer sizes if it sees a tendency towards large requests, but will not go
157 /// above this buffer size.
158 unsigned max_buffer_size = 128 * 1024;
159};
160
11fdf7f2
TL
161/// A TCP (or other stream-based protocol) connection.
162///
163/// A \c connected_socket represents a full-duplex stream between
164/// two endpoints, a local endpoint and a remote endpoint.
165class connected_socket {
166 friend class net::get_impl;
167 std::unique_ptr<net::connected_socket_impl> _csi;
168public:
169 /// Constructs a \c connected_socket not corresponding to a connection
f67539c2 170 connected_socket() noexcept;
11fdf7f2
TL
171 ~connected_socket();
172
173 /// \cond internal
f67539c2 174 explicit connected_socket(std::unique_ptr<net::connected_socket_impl> csi) noexcept;
11fdf7f2
TL
175 /// \endcond
176 /// Moves a \c connected_socket object.
177 connected_socket(connected_socket&& cs) noexcept;
178 /// Move-assigns a \c connected_socket object.
179 connected_socket& operator=(connected_socket&& cs) noexcept;
180 /// Gets the input stream.
181 ///
f67539c2
TL
182 /// \param csisc Configuration for the input_stream returned
183 ///
11fdf7f2 184 /// Gets an object returning data sent from the remote endpoint.
f67539c2 185 input_stream<char> input(connected_socket_input_stream_config csisc = {});
11fdf7f2
TL
186 /// Gets the output stream.
187 ///
188 /// Gets an object that sends data to the remote endpoint.
189 /// \param buffer_size how much data to buffer
190 output_stream<char> output(size_t buffer_size = 8192);
191 /// Sets the TCP_NODELAY option (disabling Nagle's algorithm)
192 void set_nodelay(bool nodelay);
193 /// Gets the TCP_NODELAY option (Nagle's algorithm)
194 ///
195 /// \return whether the nodelay option is enabled or not
196 bool get_nodelay() const;
197 /// Sets SO_KEEPALIVE option (enable keepalive timer on a socket)
198 void set_keepalive(bool keepalive);
199 /// Gets O_KEEPALIVE option
200 /// \return whether the keepalive option is enabled or not
201 bool get_keepalive() const;
202 /// Sets TCP keepalive parameters
203 void set_keepalive_parameters(const net::keepalive_params& p);
204 /// Get TCP keepalive parameters
205 net::keepalive_params get_keepalive_parameters() const;
f67539c2
TL
206 /// Sets custom socket options. Based on setsockopt function.
207 /// Linux users should refer to protocol-specific manuals
208 /// to see available options, e.g. tcp(7), ip(7), etc.
209 void set_sockopt(int level, int optname, const void* data, size_t len);
210 /// Gets custom socket options. Based on getsockopt function.
211 /// Linux users should refer to protocol-specific manuals
212 /// to see available options, e.g. tcp(7), ip(7), etc.
213 int get_sockopt(int level, int optname, void* data, size_t len) const;
20effc67
TL
214 /// Local address of the socket
215 socket_address local_address() const noexcept;
11fdf7f2
TL
216
217 /// Disables output to the socket.
218 ///
219 /// Current or future writes that have not been successfully flushed
220 /// will immediately fail with an error. This is useful to abort
221 /// operations on a socket that is not making progress due to a
222 /// peer failure.
223 void shutdown_output();
224 /// Disables input from the socket.
225 ///
226 /// Current or future reads will immediately fail with an error.
227 /// This is useful to abort operations on a socket that is not making
228 /// progress due to a peer failure.
229 void shutdown_input();
230};
231/// @}
232
233/// \addtogroup networking-module
234/// @{
235
236/// The seastar socket.
237///
238/// A \c socket that allows a connection to be established between
239/// two endpoints.
240class socket {
241 std::unique_ptr<net::socket_impl> _si;
242public:
f67539c2 243 socket() noexcept = default;
11fdf7f2
TL
244 ~socket();
245
246 /// \cond internal
f67539c2 247 explicit socket(std::unique_ptr<net::socket_impl> si) noexcept;
11fdf7f2
TL
248 /// \endcond
249 /// Moves a \c seastar::socket object.
250 socket(socket&&) noexcept;
251 /// Move-assigns a \c seastar::socket object.
252 socket& operator=(socket&&) noexcept;
253
254 /// Attempts to establish the connection.
255 ///
256 /// \return a \ref connected_socket representing the connection.
9f95a23c
TL
257 future<connected_socket> connect(socket_address sa, socket_address local = {}, transport proto = transport::TCP);
258
259 /// Sets SO_REUSEADDR option (enable reuseaddr option on a socket)
260 void set_reuseaddr(bool reuseaddr);
261 /// Gets O_REUSEADDR option
262 /// \return whether the reuseaddr option is enabled or not
263 bool get_reuseaddr() const;
11fdf7f2
TL
264 /// Stops any in-flight connection attempt.
265 ///
266 /// Cancels the connection attempt if it's still in progress, and
267 /// terminates the connection if it has already been established.
268 void shutdown();
269};
270
271/// @}
272
273/// \addtogroup networking-module
274/// @{
275
9f95a23c
TL
276/// The result of an server_socket::accept() call
277struct accept_result {
278 connected_socket connection; ///< The newly-accepted connection
279 socket_address remote_address; ///< The address of the peer that connected to us
280};
281
11fdf7f2
TL
282/// A listening socket, waiting to accept incoming network connections.
283class server_socket {
f67539c2 284 std::unique_ptr<net::server_socket_impl> _ssi;
11fdf7f2
TL
285 bool _aborted = false;
286public:
287 enum class load_balancing_algorithm {
288 // This algorithm tries to distribute all connections equally between all shards.
289 // It does this by sending new connections to a shard with smallest amount of connections.
290 connection_distribution,
291 // This algorithm distributes new connection based on peer's tcp port. Destination shard
292 // is calculated as a port number modulo number of shards. This allows a client to connect
293 // to a specific shard in a server given it knows how many shards server has by choosing
294 // src port number accordingly.
295 port,
9f95a23c
TL
296 // This algorithm distributes all new connections to listen_options::fixed_cpu shard only.
297 fixed,
11fdf7f2
TL
298 default_ = connection_distribution
299 };
300 /// Constructs a \c server_socket not corresponding to a connection
f67539c2 301 server_socket() noexcept;
11fdf7f2 302 /// \cond internal
f67539c2 303 explicit server_socket(std::unique_ptr<net::server_socket_impl> ssi) noexcept;
11fdf7f2
TL
304 /// \endcond
305 /// Moves a \c server_socket object.
306 server_socket(server_socket&& ss) noexcept;
307 ~server_socket();
308 /// Move-assigns a \c server_socket object.
309 server_socket& operator=(server_socket&& cs) noexcept;
310
311 /// Accepts the next connection to successfully connect to this socket.
312 ///
9f95a23c
TL
313 /// \return an accept_result representing the connection and
314 /// the socket_address of the remote endpoint.
11fdf7f2
TL
315 ///
316 /// \see listen(socket_address sa)
317 /// \see listen(socket_address sa, listen_options opts)
9f95a23c 318 future<accept_result> accept();
11fdf7f2
TL
319
320 /// Stops any \ref accept() in progress.
321 ///
322 /// Current and future \ref accept() calls will terminate immediately
323 /// with an error.
324 void abort_accept();
9f95a23c
TL
325
326 /// Local bound address
f67539c2 327 socket_address local_address() const noexcept;
11fdf7f2 328};
9f95a23c 329
11fdf7f2
TL
330/// @}
331
332struct listen_options {
333 bool reuse_address = false;
334 server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_;
335 transport proto = transport::TCP;
9f95a23c
TL
336 int listen_backlog = 100;
337 unsigned fixed_cpu = 0u;
338 void set_fixed_cpu(unsigned cpu) {
339 lba = server_socket::load_balancing_algorithm::fixed;
340 fixed_cpu = cpu;
341 }
342};
343
344class network_interface {
345private:
346 shared_ptr<net::network_interface_impl> _impl;
347public:
f67539c2
TL
348 network_interface() = delete;
349 network_interface(shared_ptr<net::network_interface_impl>) noexcept;
350 network_interface(network_interface&&) noexcept;
9f95a23c 351
f67539c2 352 network_interface& operator=(network_interface&&) noexcept;
9f95a23c
TL
353
354 uint32_t index() const;
355 uint32_t mtu() const;
356
357 const sstring& name() const;
358 const sstring& display_name() const;
359 const std::vector<net::inet_address>& addresses() const;
360 const std::vector<uint8_t> hardware_address() const;
361
362 bool is_loopback() const;
363 bool is_virtual() const;
364 bool is_up() const;
365 bool supports_ipv6() const;
11fdf7f2
TL
366};
367
368class network_stack {
369public:
370 virtual ~network_stack() {}
371 virtual server_socket listen(socket_address sa, listen_options opts) = 0;
372 // FIXME: local parameter assumes ipv4 for now, fix when adding other AF
9f95a23c 373 future<connected_socket> connect(socket_address sa, socket_address = {}, transport proto = transport::TCP);
11fdf7f2 374 virtual ::seastar::socket socket() = 0;
9f95a23c 375 virtual net::udp_channel make_udp_channel(const socket_address& = {}) = 0;
11fdf7f2
TL
376 virtual future<> initialize() {
377 return make_ready_future();
378 }
379 virtual bool has_per_core_namespace() = 0;
9f95a23c
TL
380 // NOTE: this is not a correct query approach.
381 // This question should be per NIC, but we have no such
382 // abstraction, so for now this is "stack-wide"
383 virtual bool supports_ipv6() const {
384 return false;
385 }
386
387 /**
388 * Returns available network interfaces. This represents a
389 * snapshot of interfaces available at call time, hence the
390 * return by value.
391 */
392 virtual std::vector<network_interface> network_interfaces();
11fdf7f2
TL
393};
394
20effc67
TL
395struct network_stack_entry {
396 using factory_func = noncopyable_function<future<std::unique_ptr<network_stack>> (const program_options::option_group&)>;
397
398 sstring name;
399 std::unique_ptr<program_options::option_group> opts;
400 factory_func factory;
401 bool is_default;
402};
403
11fdf7f2 404}