]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/include/seastar/net/api.hh
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / seastar / include / seastar / net / api.hh
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22#pragma once
23
24#include <memory>
25#include <vector>
26#include <cstring>
27#include <seastar/core/future.hh>
28#include <seastar/net/byteorder.hh>
29#include <seastar/net/socket_defs.hh>
30#include <seastar/net/packet.hh>
1e59de90 31#include <seastar/core/internal/api-level.hh>
11fdf7f2
TL
32#include <seastar/core/temporary_buffer.hh>
33#include <seastar/core/iostream.hh>
34#include <seastar/util/std-compat.hh>
20effc67 35#include <seastar/util/program-options.hh>
11fdf7f2
TL
36#include <sys/types.h>
37
38namespace seastar {
39
9f95a23c 40inline
f67539c2 41bool is_ip_unspecified(const ipv4_addr& addr) noexcept {
9f95a23c 42 return addr.is_ip_unspecified();
11fdf7f2
TL
43}
44
9f95a23c 45inline
f67539c2 46bool is_port_unspecified(const ipv4_addr& addr) noexcept {
9f95a23c 47 return addr.is_port_unspecified();
11fdf7f2
TL
48}
49
9f95a23c 50inline
f67539c2 51socket_address make_ipv4_address(const ipv4_addr& addr) noexcept {
9f95a23c 52 return socket_address(addr);
11fdf7f2
TL
53}
54
55inline
f67539c2 56socket_address make_ipv4_address(uint32_t ip, uint16_t port) noexcept {
9f95a23c 57 return make_ipv4_address(ipv4_addr(ip, port));
11fdf7f2
TL
58}
59
60namespace net {
61
62// see linux tcp(7) for parameter explanation
63struct tcp_keepalive_params {
64 std::chrono::seconds idle; // TCP_KEEPIDLE
65 std::chrono::seconds interval; // TCP_KEEPINTVL
66 unsigned count; // TCP_KEEPCNT
67};
68
69// see linux sctp(7) for parameter explanation
70struct sctp_keepalive_params {
71 std::chrono::seconds interval; // spp_hbinterval
72 unsigned count; // spp_pathmaxrt
73};
74
f67539c2 75using keepalive_params = std::variant<tcp_keepalive_params, sctp_keepalive_params>;
11fdf7f2
TL
76
77/// \cond internal
78class connected_socket_impl;
79class socket_impl;
9f95a23c 80
f67539c2 81class server_socket_impl;
11fdf7f2
TL
82class udp_channel_impl;
83class get_impl;
84/// \endcond
85
86class udp_datagram_impl {
87public:
88 virtual ~udp_datagram_impl() {};
9f95a23c
TL
89 virtual socket_address get_src() = 0;
90 virtual socket_address get_dst() = 0;
11fdf7f2
TL
91 virtual uint16_t get_dst_port() = 0;
92 virtual packet& get_data() = 0;
93};
94
95class udp_datagram final {
96private:
97 std::unique_ptr<udp_datagram_impl> _impl;
98public:
f67539c2 99 udp_datagram(std::unique_ptr<udp_datagram_impl>&& impl) noexcept : _impl(std::move(impl)) {};
9f95a23c
TL
100 socket_address get_src() { return _impl->get_src(); }
101 socket_address get_dst() { return _impl->get_dst(); }
11fdf7f2
TL
102 uint16_t get_dst_port() { return _impl->get_dst_port(); }
103 packet& get_data() { return _impl->get_data(); }
104};
105
106class udp_channel {
107private:
108 std::unique_ptr<udp_channel_impl> _impl;
109public:
f67539c2
TL
110 udp_channel() noexcept;
111 udp_channel(std::unique_ptr<udp_channel_impl>) noexcept;
11fdf7f2
TL
112 ~udp_channel();
113
f67539c2
TL
114 udp_channel(udp_channel&&) noexcept;
115 udp_channel& operator=(udp_channel&&) noexcept;
11fdf7f2 116
9f95a23c
TL
117 socket_address local_address() const;
118
11fdf7f2 119 future<udp_datagram> receive();
9f95a23c
TL
120 future<> send(const socket_address& dst, const char* msg);
121 future<> send(const socket_address& dst, packet p);
11fdf7f2
TL
122 bool is_closed() const;
123 /// Causes a pending receive() to complete (possibly with an exception)
124 void shutdown_input();
125 /// Causes a pending send() to complete (possibly with an exception)
126 void shutdown_output();
127 /// Close the channel and releases all resources.
128 ///
129 /// Must be called only when there are no unfinished send() or receive() calls. You
130 /// can force pending calls to complete soon by calling shutdown_input() and
131 /// shutdown_output().
132 void close();
133};
134
9f95a23c
TL
135class network_interface_impl;
136
11fdf7f2
TL
137} /* namespace net */
138
139/// \addtogroup networking-module
140/// @{
141
f67539c2
TL
142/// Configuration for buffered connected_socket input operations
143///
144/// This structure allows tuning of buffered input operations done via
145/// connected_socket. It is a hint to the implementation and may be
146/// ignored (e.g. the zero-copy native stack does not allocate buffers,
147/// so it ignores buffer-size parameters).
148struct connected_socket_input_stream_config final {
149 /// Initial buffer size to use for input buffering
150 unsigned buffer_size = 8192;
151 /// Minimum buffer size to use for input buffering. The system will decrease
152 /// buffer sizes if it sees a tendency towards small requests, but will not go
153 /// below this buffer size.
154 unsigned min_buffer_size = 512;
155 /// Maximum buffer size to use for input buffering. The system will increase
156 /// buffer sizes if it sees a tendency towards large requests, but will not go
157 /// above this buffer size.
158 unsigned max_buffer_size = 128 * 1024;
159};
160
1e59de90
TL
161/// Distinguished name
162struct session_dn {
163 sstring subject;
164 sstring issuer;
165};
166
11fdf7f2
TL
167/// A TCP (or other stream-based protocol) connection.
168///
169/// A \c connected_socket represents a full-duplex stream between
170/// two endpoints, a local endpoint and a remote endpoint.
171class connected_socket {
172 friend class net::get_impl;
173 std::unique_ptr<net::connected_socket_impl> _csi;
174public:
175 /// Constructs a \c connected_socket not corresponding to a connection
f67539c2 176 connected_socket() noexcept;
11fdf7f2
TL
177 ~connected_socket();
178
179 /// \cond internal
f67539c2 180 explicit connected_socket(std::unique_ptr<net::connected_socket_impl> csi) noexcept;
11fdf7f2
TL
181 /// \endcond
182 /// Moves a \c connected_socket object.
183 connected_socket(connected_socket&& cs) noexcept;
184 /// Move-assigns a \c connected_socket object.
185 connected_socket& operator=(connected_socket&& cs) noexcept;
186 /// Gets the input stream.
187 ///
f67539c2
TL
188 /// \param csisc Configuration for the input_stream returned
189 ///
11fdf7f2 190 /// Gets an object returning data sent from the remote endpoint.
f67539c2 191 input_stream<char> input(connected_socket_input_stream_config csisc = {});
11fdf7f2
TL
192 /// Gets the output stream.
193 ///
194 /// Gets an object that sends data to the remote endpoint.
195 /// \param buffer_size how much data to buffer
196 output_stream<char> output(size_t buffer_size = 8192);
197 /// Sets the TCP_NODELAY option (disabling Nagle's algorithm)
198 void set_nodelay(bool nodelay);
199 /// Gets the TCP_NODELAY option (Nagle's algorithm)
200 ///
201 /// \return whether the nodelay option is enabled or not
202 bool get_nodelay() const;
203 /// Sets SO_KEEPALIVE option (enable keepalive timer on a socket)
204 void set_keepalive(bool keepalive);
205 /// Gets O_KEEPALIVE option
206 /// \return whether the keepalive option is enabled or not
207 bool get_keepalive() const;
208 /// Sets TCP keepalive parameters
209 void set_keepalive_parameters(const net::keepalive_params& p);
210 /// Get TCP keepalive parameters
211 net::keepalive_params get_keepalive_parameters() const;
f67539c2
TL
212 /// Sets custom socket options. Based on setsockopt function.
213 /// Linux users should refer to protocol-specific manuals
214 /// to see available options, e.g. tcp(7), ip(7), etc.
215 void set_sockopt(int level, int optname, const void* data, size_t len);
216 /// Gets custom socket options. Based on getsockopt function.
217 /// Linux users should refer to protocol-specific manuals
218 /// to see available options, e.g. tcp(7), ip(7), etc.
219 int get_sockopt(int level, int optname, void* data, size_t len) const;
20effc67
TL
220 /// Local address of the socket
221 socket_address local_address() const noexcept;
11fdf7f2
TL
222
223 /// Disables output to the socket.
224 ///
225 /// Current or future writes that have not been successfully flushed
226 /// will immediately fail with an error. This is useful to abort
227 /// operations on a socket that is not making progress due to a
228 /// peer failure.
229 void shutdown_output();
230 /// Disables input from the socket.
231 ///
232 /// Current or future reads will immediately fail with an error.
233 /// This is useful to abort operations on a socket that is not making
234 /// progress due to a peer failure.
235 void shutdown_input();
1e59de90
TL
236 /// Check whether the \c connected_socket is initialized.
237 ///
238 /// \return true if this \c connected_socket socket_address is bound initialized
239 /// false otherwise.
240 ///
241 /// \see connect(socket_address sa)
242 /// \see connect(socket_address sa, socket_address local, transport proto)
243 explicit operator bool() const noexcept {
244 return static_cast<bool>(_csi);
245 }
246 /// Waits for the peer of this socket to disconnect
247 ///
248 /// \return future that resolves when the peer closes connection or shuts it down
249 /// for writing or when local socket is called \ref shutdown_input().
250 ///
251 /// Note, that when the returned future is resolved for whatever reason socket
252 /// may still be readable from, so the caller may want to wait for both events
253 /// -- this one and EOF from read.
254 ///
255 /// Calling it several times per socket is not allowed (undefined behavior)
256 ///
257 /// \see poll(2) about POLLRDHUP for more details
258 future<> wait_input_shutdown();
11fdf7f2
TL
259};
260/// @}
261
262/// \addtogroup networking-module
263/// @{
264
265/// The seastar socket.
266///
267/// A \c socket that allows a connection to be established between
268/// two endpoints.
269class socket {
270 std::unique_ptr<net::socket_impl> _si;
271public:
f67539c2 272 socket() noexcept = default;
11fdf7f2
TL
273 ~socket();
274
275 /// \cond internal
f67539c2 276 explicit socket(std::unique_ptr<net::socket_impl> si) noexcept;
11fdf7f2
TL
277 /// \endcond
278 /// Moves a \c seastar::socket object.
279 socket(socket&&) noexcept;
280 /// Move-assigns a \c seastar::socket object.
281 socket& operator=(socket&&) noexcept;
282
283 /// Attempts to establish the connection.
284 ///
285 /// \return a \ref connected_socket representing the connection.
9f95a23c
TL
286 future<connected_socket> connect(socket_address sa, socket_address local = {}, transport proto = transport::TCP);
287
288 /// Sets SO_REUSEADDR option (enable reuseaddr option on a socket)
289 void set_reuseaddr(bool reuseaddr);
290 /// Gets O_REUSEADDR option
291 /// \return whether the reuseaddr option is enabled or not
292 bool get_reuseaddr() const;
11fdf7f2
TL
293 /// Stops any in-flight connection attempt.
294 ///
295 /// Cancels the connection attempt if it's still in progress, and
296 /// terminates the connection if it has already been established.
297 void shutdown();
298};
299
300/// @}
301
302/// \addtogroup networking-module
303/// @{
304
9f95a23c
TL
305/// The result of an server_socket::accept() call
306struct accept_result {
307 connected_socket connection; ///< The newly-accepted connection
308 socket_address remote_address; ///< The address of the peer that connected to us
309};
310
11fdf7f2
TL
311/// A listening socket, waiting to accept incoming network connections.
312class server_socket {
f67539c2 313 std::unique_ptr<net::server_socket_impl> _ssi;
11fdf7f2
TL
314 bool _aborted = false;
315public:
316 enum class load_balancing_algorithm {
317 // This algorithm tries to distribute all connections equally between all shards.
318 // It does this by sending new connections to a shard with smallest amount of connections.
319 connection_distribution,
320 // This algorithm distributes new connection based on peer's tcp port. Destination shard
321 // is calculated as a port number modulo number of shards. This allows a client to connect
322 // to a specific shard in a server given it knows how many shards server has by choosing
323 // src port number accordingly.
324 port,
9f95a23c
TL
325 // This algorithm distributes all new connections to listen_options::fixed_cpu shard only.
326 fixed,
11fdf7f2
TL
327 default_ = connection_distribution
328 };
1e59de90 329 /// Constructs a \c server_socket without being bound to any address
f67539c2 330 server_socket() noexcept;
11fdf7f2 331 /// \cond internal
f67539c2 332 explicit server_socket(std::unique_ptr<net::server_socket_impl> ssi) noexcept;
11fdf7f2
TL
333 /// \endcond
334 /// Moves a \c server_socket object.
335 server_socket(server_socket&& ss) noexcept;
336 ~server_socket();
337 /// Move-assigns a \c server_socket object.
338 server_socket& operator=(server_socket&& cs) noexcept;
339
340 /// Accepts the next connection to successfully connect to this socket.
341 ///
9f95a23c
TL
342 /// \return an accept_result representing the connection and
343 /// the socket_address of the remote endpoint.
11fdf7f2
TL
344 ///
345 /// \see listen(socket_address sa)
346 /// \see listen(socket_address sa, listen_options opts)
9f95a23c 347 future<accept_result> accept();
11fdf7f2
TL
348
349 /// Stops any \ref accept() in progress.
350 ///
351 /// Current and future \ref accept() calls will terminate immediately
352 /// with an error.
353 void abort_accept();
9f95a23c
TL
354
355 /// Local bound address
1e59de90
TL
356 ///
357 /// \return the local bound address if the \c server_socket is listening,
358 /// an empty address constructed with \c socket_address() otherwise.
359 ///
360 /// \see listen(socket_address sa)
361 /// \see listen(socket_address sa, listen_options opts)
f67539c2 362 socket_address local_address() const noexcept;
1e59de90
TL
363
364 /// Check whether the \c server_socket is listening on any address.
365 ///
366 /// \return true if this \c socket_address is bound to an address,
367 /// false if it is just created with the default constructor.
368 ///
369 /// \see listen(socket_address sa)
370 /// \see listen(socket_address sa, listen_options opts)
371 explicit operator bool() const noexcept {
372 return static_cast<bool>(_ssi);
373 }
11fdf7f2 374};
9f95a23c 375
11fdf7f2
TL
376/// @}
377
378struct listen_options {
379 bool reuse_address = false;
380 server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_;
381 transport proto = transport::TCP;
9f95a23c
TL
382 int listen_backlog = 100;
383 unsigned fixed_cpu = 0u;
384 void set_fixed_cpu(unsigned cpu) {
385 lba = server_socket::load_balancing_algorithm::fixed;
386 fixed_cpu = cpu;
387 }
388};
389
390class network_interface {
391private:
392 shared_ptr<net::network_interface_impl> _impl;
393public:
f67539c2
TL
394 network_interface() = delete;
395 network_interface(shared_ptr<net::network_interface_impl>) noexcept;
396 network_interface(network_interface&&) noexcept;
9f95a23c 397
f67539c2 398 network_interface& operator=(network_interface&&) noexcept;
9f95a23c
TL
399
400 uint32_t index() const;
401 uint32_t mtu() const;
402
403 const sstring& name() const;
404 const sstring& display_name() const;
405 const std::vector<net::inet_address>& addresses() const;
406 const std::vector<uint8_t> hardware_address() const;
407
408 bool is_loopback() const;
409 bool is_virtual() const;
410 bool is_up() const;
411 bool supports_ipv6() const;
11fdf7f2
TL
412};
413
414class network_stack {
415public:
416 virtual ~network_stack() {}
417 virtual server_socket listen(socket_address sa, listen_options opts) = 0;
418 // FIXME: local parameter assumes ipv4 for now, fix when adding other AF
9f95a23c 419 future<connected_socket> connect(socket_address sa, socket_address = {}, transport proto = transport::TCP);
11fdf7f2 420 virtual ::seastar::socket socket() = 0;
9f95a23c 421 virtual net::udp_channel make_udp_channel(const socket_address& = {}) = 0;
11fdf7f2
TL
422 virtual future<> initialize() {
423 return make_ready_future();
424 }
425 virtual bool has_per_core_namespace() = 0;
9f95a23c
TL
426 // NOTE: this is not a correct query approach.
427 // This question should be per NIC, but we have no such
428 // abstraction, so for now this is "stack-wide"
429 virtual bool supports_ipv6() const {
430 return false;
431 }
432
433 /**
434 * Returns available network interfaces. This represents a
435 * snapshot of interfaces available at call time, hence the
436 * return by value.
437 */
438 virtual std::vector<network_interface> network_interfaces();
11fdf7f2
TL
439};
440
20effc67
TL
441struct network_stack_entry {
442 using factory_func = noncopyable_function<future<std::unique_ptr<network_stack>> (const program_options::option_group&)>;
443
444 sstring name;
445 std::unique_ptr<program_options::option_group> opts;
446 factory_func factory;
447 bool is_default;
448};
449
11fdf7f2 450}