]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/mptcp/protocol.h
Merge tag 'drm-next-2020-04-08' of git://anongit.freedesktop.org/drm/drm
[mirror_ubuntu-jammy-kernel.git] / net / mptcp / protocol.h
CommitLineData
f870fa0b
MM
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Multipath TCP
3 *
4 * Copyright (c) 2017 - 2019, Intel Corporation.
5 */
6
7#ifndef __MPTCP_PROTOCOL_H
8#define __MPTCP_PROTOCOL_H
9
79c0949e
PK
10#include <linux/random.h>
11#include <net/tcp.h>
12#include <net/inet_connection_sock.h>
13
cc7972ea 14#define MPTCP_SUPPORTED_VERSION 1
eda7acdd
PK
15
16/* MPTCP option bits */
17#define OPTION_MPTCP_MPC_SYN BIT(0)
18#define OPTION_MPTCP_MPC_SYNACK BIT(1)
19#define OPTION_MPTCP_MPC_ACK BIT(2)
f296234c
PK
20#define OPTION_MPTCP_MPJ_SYN BIT(3)
21#define OPTION_MPTCP_MPJ_SYNACK BIT(4)
22#define OPTION_MPTCP_MPJ_ACK BIT(5)
3df523ab
PK
23#define OPTION_MPTCP_ADD_ADDR BIT(6)
24#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
25#define OPTION_MPTCP_RM_ADDR BIT(8)
eda7acdd
PK
26
27/* MPTCP option subtypes */
28#define MPTCPOPT_MP_CAPABLE 0
29#define MPTCPOPT_MP_JOIN 1
30#define MPTCPOPT_DSS 2
31#define MPTCPOPT_ADD_ADDR 3
32#define MPTCPOPT_RM_ADDR 4
33#define MPTCPOPT_MP_PRIO 5
34#define MPTCPOPT_MP_FAIL 6
35#define MPTCPOPT_MP_FASTCLOSE 7
36
37/* MPTCP suboption lengths */
cc7972ea 38#define TCPOLEN_MPTCP_MPC_SYN 4
eda7acdd
PK
39#define TCPOLEN_MPTCP_MPC_SYNACK 12
40#define TCPOLEN_MPTCP_MPC_ACK 20
cc7972ea 41#define TCPOLEN_MPTCP_MPC_ACK_DATA 22
f296234c
PK
42#define TCPOLEN_MPTCP_MPJ_SYN 12
43#define TCPOLEN_MPTCP_MPJ_SYNACK 16
44#define TCPOLEN_MPTCP_MPJ_ACK 24
6d0060f6 45#define TCPOLEN_MPTCP_DSS_BASE 4
648ef4b8 46#define TCPOLEN_MPTCP_DSS_ACK32 4
6d0060f6 47#define TCPOLEN_MPTCP_DSS_ACK64 8
648ef4b8 48#define TCPOLEN_MPTCP_DSS_MAP32 10
6d0060f6
MM
49#define TCPOLEN_MPTCP_DSS_MAP64 14
50#define TCPOLEN_MPTCP_DSS_CHECKSUM 2
3df523ab
PK
51#define TCPOLEN_MPTCP_ADD_ADDR 16
52#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18
53#define TCPOLEN_MPTCP_ADD_ADDR_BASE 8
54#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10
55#define TCPOLEN_MPTCP_ADD_ADDR6 28
56#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30
57#define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20
58#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22
59#define TCPOLEN_MPTCP_PORT_LEN 2
60#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
eda7acdd 61
ec3edaa7 62/* MPTCP MP_JOIN flags */
f296234c
PK
63#define MPTCPOPT_BACKUP BIT(0)
64#define MPTCPOPT_HMAC_LEN 20
ec3edaa7 65#define MPTCPOPT_THMAC_LEN 8
f296234c 66
eda7acdd
PK
67/* MPTCP MP_CAPABLE flags */
68#define MPTCP_VERSION_MASK (0x0F)
69#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
70#define MPTCP_CAP_EXTENSIBILITY BIT(6)
65492c5a 71#define MPTCP_CAP_HMAC_SHA256 BIT(0)
eda7acdd
PK
72#define MPTCP_CAP_FLAG_MASK (0x3F)
73
6d0060f6
MM
74/* MPTCP DSS flags */
75#define MPTCP_DSS_DATA_FIN BIT(4)
76#define MPTCP_DSS_DSN64 BIT(3)
77#define MPTCP_DSS_HAS_MAP BIT(2)
78#define MPTCP_DSS_ACK64 BIT(1)
79#define MPTCP_DSS_HAS_ACK BIT(0)
648ef4b8
MM
80#define MPTCP_DSS_FLAG_MASK (0x1F)
81
3df523ab
PK
82/* MPTCP ADD_ADDR flags */
83#define MPTCP_ADDR_ECHO BIT(0)
84#define MPTCP_ADDR_HMAC_LEN 20
85#define MPTCP_ADDR_IPVERSION_4 4
86#define MPTCP_ADDR_IPVERSION_6 6
87
648ef4b8 88/* MPTCP socket flags */
d99bfed5
FW
89#define MPTCP_DATA_READY 0
90#define MPTCP_SEND_SPACE 1
3b1d6210 91#define MPTCP_WORK_RTX 2
59832e24 92#define MPTCP_WORK_EOF 3
6d0060f6 93
3df523ab
PK
94static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
95{
96 return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
97 ((nib & 0xF) << 8) | field);
98}
99
1b1c7a0e
PK
100#define MPTCP_PM_MAX_ADDR 4
101
102struct mptcp_addr_info {
103 sa_family_t family;
104 __be16 port;
105 u8 id;
106 union {
107 struct in_addr addr;
108#if IS_ENABLED(CONFIG_MPTCP_IPV6)
109 struct in6_addr addr6;
110#endif
111 };
112};
113
114enum mptcp_pm_status {
115 MPTCP_PM_ADD_ADDR_RECEIVED,
116 MPTCP_PM_ESTABLISHED,
117 MPTCP_PM_SUBFLOW_ESTABLISHED,
118};
119
120struct mptcp_pm_data {
121 struct mptcp_addr_info local;
122 struct mptcp_addr_info remote;
123
124 spinlock_t lock; /*protects the whole PM data */
125
126 bool addr_signal;
127 bool server_side;
128 bool work_pending;
129 bool accept_addr;
130 bool accept_subflow;
131 u8 add_addr_signaled;
132 u8 add_addr_accepted;
133 u8 local_addr_used;
134 u8 subflows;
135 u8 add_addr_signal_max;
136 u8 add_addr_accept_max;
137 u8 local_addr_max;
138 u8 subflows_max;
139 u8 status;
140
141 struct work_struct work;
142};
143
18b683bf
PA
144struct mptcp_data_frag {
145 struct list_head list;
146 u64 data_seq;
147 int data_len;
148 int offset;
149 int overhead;
150 struct page *page;
151};
152
f870fa0b
MM
153/* MPTCP connection sock */
154struct mptcp_sock {
155 /* inet_connection_sock must be the first member */
156 struct inet_connection_sock sk;
cec37a6e
PK
157 u64 local_key;
158 u64 remote_key;
6d0060f6
MM
159 u64 write_seq;
160 u64 ack_seq;
cc9d2566 161 atomic64_t snd_una;
b51f9b80 162 unsigned long timer_ival;
79c0949e 163 u32 token;
648ef4b8 164 unsigned long flags;
d22f4988 165 bool can_ack;
ec3edaa7 166 spinlock_t join_list_lock;
80992017 167 struct work_struct work;
cec37a6e 168 struct list_head conn_list;
18b683bf 169 struct list_head rtx_queue;
ec3edaa7 170 struct list_head join_list;
6d0060f6 171 struct skb_ext *cached_ext; /* for the next sendmsg */
f870fa0b 172 struct socket *subflow; /* outgoing connect/listener/!mp_capable */
8ab183de 173 struct sock *first;
1b1c7a0e 174 struct mptcp_pm_data pm;
f870fa0b
MM
175};
176
cec37a6e
PK
177#define mptcp_for_each_subflow(__msk, __subflow) \
178 list_for_each_entry(__subflow, &((__msk)->conn_list), node)
179
f870fa0b
MM
180static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
181{
182 return (struct mptcp_sock *)sk;
183}
184
18b683bf
PA
185static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
186{
187 struct mptcp_sock *msk = mptcp_sk(sk);
188
189 if (list_empty(&msk->rtx_queue))
190 return NULL;
191
192 return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
193}
194
7948f6cc
FW
195static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
196{
197 struct mptcp_sock *msk = mptcp_sk(sk);
198
199 if (list_empty(&msk->rtx_queue))
200 return NULL;
201
202 return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
203}
204
cec37a6e
PK
205struct mptcp_subflow_request_sock {
206 struct tcp_request_sock sk;
d22f4988 207 u16 mp_capable : 1,
cec37a6e 208 mp_join : 1,
d22f4988
CP
209 backup : 1,
210 remote_key_valid : 1;
1b1c7a0e 211 u8 local_id;
f296234c 212 u8 remote_id;
cec37a6e
PK
213 u64 local_key;
214 u64 remote_key;
79c0949e
PK
215 u64 idsn;
216 u32 token;
648ef4b8 217 u32 ssn_offset;
f296234c
PK
218 u64 thmac;
219 u32 local_nonce;
220 u32 remote_nonce;
cec37a6e
PK
221};
222
223static inline struct mptcp_subflow_request_sock *
224mptcp_subflow_rsk(const struct request_sock *rsk)
225{
226 return (struct mptcp_subflow_request_sock *)rsk;
227}
228
2303f994
PK
229/* MPTCP subflow context */
230struct mptcp_subflow_context {
cec37a6e
PK
231 struct list_head node;/* conn_list of subflows */
232 u64 local_key;
233 u64 remote_key;
79c0949e 234 u64 idsn;
648ef4b8 235 u64 map_seq;
cc7972ea 236 u32 snd_isn;
79c0949e 237 u32 token;
6d0060f6 238 u32 rel_write_seq;
648ef4b8
MM
239 u32 map_subflow_seq;
240 u32 ssn_offset;
241 u32 map_data_len;
cec37a6e 242 u32 request_mptcp : 1, /* send MP_CAPABLE */
ec3edaa7
PK
243 request_join : 1, /* send MP_JOIN */
244 request_bkup : 1,
cec37a6e 245 mp_capable : 1, /* remote is MPTCP capable */
f296234c 246 mp_join : 1, /* remote is JOINing */
0be534f5 247 fully_established : 1, /* path validated */
f296234c 248 pm_notified : 1, /* PM hook called for established status */
648ef4b8
MM
249 conn_finished : 1,
250 map_valid : 1,
d22f4988 251 mpc_map : 1,
f296234c 252 backup : 1,
648ef4b8 253 data_avail : 1,
d22f4988 254 rx_eof : 1,
76c42a29 255 data_fin_tx_enable : 1,
d22f4988 256 can_ack : 1; /* only after processing the remote a key */
76c42a29 257 u64 data_fin_tx_seq;
f296234c
PK
258 u32 remote_nonce;
259 u64 thmac;
260 u32 local_nonce;
ec3edaa7
PK
261 u32 remote_token;
262 u8 hmac[MPTCPOPT_HMAC_LEN];
f296234c
PK
263 u8 local_id;
264 u8 remote_id;
648ef4b8 265
2303f994
PK
266 struct sock *tcp_sock; /* tcp sk backpointer */
267 struct sock *conn; /* parent mptcp_sock */
cec37a6e 268 const struct inet_connection_sock_af_ops *icsk_af_ops;
648ef4b8
MM
269 void (*tcp_data_ready)(struct sock *sk);
270 void (*tcp_state_change)(struct sock *sk);
271 void (*tcp_write_space)(struct sock *sk);
272
2303f994
PK
273 struct rcu_head rcu;
274};
275
276static inline struct mptcp_subflow_context *
277mptcp_subflow_ctx(const struct sock *sk)
278{
279 struct inet_connection_sock *icsk = inet_csk(sk);
280
281 /* Use RCU on icsk_ulp_data only for sock diag code */
282 return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data;
283}
284
285static inline struct sock *
286mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
287{
288 return subflow->tcp_sock;
289}
290
648ef4b8
MM
291static inline u64
292mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
293{
294 return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq -
295 subflow->ssn_offset -
296 subflow->map_subflow_seq;
297}
298
299static inline u64
300mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
301{
302 return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
303}
304
305int mptcp_is_enabled(struct net *net);
306bool mptcp_subflow_data_available(struct sock *sk);
2303f994 307void mptcp_subflow_init(void);
ec3edaa7
PK
308
309/* called with sk socket lock held */
310int __mptcp_subflow_connect(struct sock *sk, int ifindex,
311 const struct mptcp_addr_info *loc,
312 const struct mptcp_addr_info *remote);
2303f994
PK
313int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
314
648ef4b8
MM
315static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
316 struct mptcp_subflow_context *ctx)
317{
318 sk->sk_data_ready = ctx->tcp_data_ready;
319 sk->sk_state_change = ctx->tcp_state_change;
320 sk->sk_write_space = ctx->tcp_write_space;
321
322 inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
323}
324
cec37a6e
PK
325extern const struct inet_connection_sock_af_ops ipv4_specific;
326#if IS_ENABLED(CONFIG_MPTCP_IPV6)
327extern const struct inet_connection_sock_af_ops ipv6_specific;
328#endif
329
648ef4b8 330void mptcp_proto_init(void);
784325e9
MB
331#if IS_ENABLED(CONFIG_MPTCP_IPV6)
332int mptcp_proto_v6_init(void);
333#endif
648ef4b8 334
58b09919 335struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
cec37a6e
PK
336void mptcp_get_options(const struct sk_buff *skb,
337 struct tcp_options_received *opt_rx);
338
339void mptcp_finish_connect(struct sock *sk);
2e52213c 340void mptcp_data_ready(struct sock *sk, struct sock *ssk);
f296234c 341bool mptcp_finish_join(struct sock *sk);
b51f9b80 342void mptcp_data_acked(struct sock *sk);
59832e24 343void mptcp_subflow_eof(struct sock *sk);
cec37a6e 344
79c0949e
PK
345int mptcp_token_new_request(struct request_sock *req);
346void mptcp_token_destroy_request(u32 token);
347int mptcp_token_new_connect(struct sock *sk);
58b09919 348int mptcp_token_new_accept(u32 token, struct sock *conn);
f296234c 349struct mptcp_sock *mptcp_token_get_sock(u32 token);
79c0949e
PK
350void mptcp_token_destroy(u32 token);
351
352void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
353static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
354{
355 /* we might consider a faster version that computes the key as a
356 * hash of some information available in the MPTCP socket. Use
357 * random data at the moment, as it's probably the safest option
358 * in case multiple sockets are opened in different namespaces at
359 * the same time.
360 */
361 get_random_bytes(key, sizeof(u64));
362 mptcp_crypto_key_sha(*key, token, idsn);
363}
364
3df523ab 365void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
79c0949e 366
1b1c7a0e
PK
367void mptcp_pm_init(void);
368void mptcp_pm_data_init(struct mptcp_sock *msk);
926bdeab 369void mptcp_pm_close(struct mptcp_sock *msk);
1b1c7a0e
PK
370void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
371void mptcp_pm_fully_established(struct mptcp_sock *msk);
372bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
373void mptcp_pm_connection_closed(struct mptcp_sock *msk);
374void mptcp_pm_subflow_established(struct mptcp_sock *msk,
375 struct mptcp_subflow_context *subflow);
376void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
377void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
378 const struct mptcp_addr_info *addr);
379
380int mptcp_pm_announce_addr(struct mptcp_sock *msk,
381 const struct mptcp_addr_info *addr);
382int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
383int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id);
384
385static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk)
386{
387 return READ_ONCE(msk->pm.addr_signal);
388}
389
390static inline unsigned int mptcp_add_addr_len(int family)
391{
392 if (family == AF_INET)
393 return TCPOLEN_MPTCP_ADD_ADDR;
394 return TCPOLEN_MPTCP_ADD_ADDR6;
395}
396
397bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
398 struct mptcp_addr_info *saddr);
399int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
400
01cacb00
PA
401void mptcp_pm_nl_init(void);
402void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
403void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
404void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
405void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
406int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
407
6d0060f6
MM
408static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
409{
410 return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
411}
412
648ef4b8
MM
413static inline bool before64(__u64 seq1, __u64 seq2)
414{
415 return (__s64)(seq1 - seq2) < 0;
416}
417
418#define after64(seq2, seq1) before64(seq1, seq2)
419
5147dfb5
DC
420void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops);
421
f870fa0b 422#endif /* __MPTCP_PROTOCOL_H */