]>
Commit | Line | Data |
---|---|---|
f870fa0b MM |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2017 - 2019, Intel Corporation. | |
5 | */ | |
6 | ||
7 | #ifndef __MPTCP_PROTOCOL_H | |
8 | #define __MPTCP_PROTOCOL_H | |
9 | ||
79c0949e PK |
10 | #include <linux/random.h> |
11 | #include <net/tcp.h> | |
12 | #include <net/inet_connection_sock.h> | |
13 | ||
cc7972ea | 14 | #define MPTCP_SUPPORTED_VERSION 1 |
eda7acdd PK |
15 | |
16 | /* MPTCP option bits */ | |
17 | #define OPTION_MPTCP_MPC_SYN BIT(0) | |
18 | #define OPTION_MPTCP_MPC_SYNACK BIT(1) | |
19 | #define OPTION_MPTCP_MPC_ACK BIT(2) | |
f296234c PK |
20 | #define OPTION_MPTCP_MPJ_SYN BIT(3) |
21 | #define OPTION_MPTCP_MPJ_SYNACK BIT(4) | |
22 | #define OPTION_MPTCP_MPJ_ACK BIT(5) | |
3df523ab PK |
23 | #define OPTION_MPTCP_ADD_ADDR BIT(6) |
24 | #define OPTION_MPTCP_ADD_ADDR6 BIT(7) | |
25 | #define OPTION_MPTCP_RM_ADDR BIT(8) | |
eda7acdd PK |
26 | |
27 | /* MPTCP option subtypes */ | |
28 | #define MPTCPOPT_MP_CAPABLE 0 | |
29 | #define MPTCPOPT_MP_JOIN 1 | |
30 | #define MPTCPOPT_DSS 2 | |
31 | #define MPTCPOPT_ADD_ADDR 3 | |
32 | #define MPTCPOPT_RM_ADDR 4 | |
33 | #define MPTCPOPT_MP_PRIO 5 | |
34 | #define MPTCPOPT_MP_FAIL 6 | |
35 | #define MPTCPOPT_MP_FASTCLOSE 7 | |
36 | ||
37 | /* MPTCP suboption lengths */ | |
cc7972ea | 38 | #define TCPOLEN_MPTCP_MPC_SYN 4 |
eda7acdd PK |
39 | #define TCPOLEN_MPTCP_MPC_SYNACK 12 |
40 | #define TCPOLEN_MPTCP_MPC_ACK 20 | |
cc7972ea | 41 | #define TCPOLEN_MPTCP_MPC_ACK_DATA 22 |
f296234c PK |
42 | #define TCPOLEN_MPTCP_MPJ_SYN 12 |
43 | #define TCPOLEN_MPTCP_MPJ_SYNACK 16 | |
44 | #define TCPOLEN_MPTCP_MPJ_ACK 24 | |
6d0060f6 | 45 | #define TCPOLEN_MPTCP_DSS_BASE 4 |
648ef4b8 | 46 | #define TCPOLEN_MPTCP_DSS_ACK32 4 |
6d0060f6 | 47 | #define TCPOLEN_MPTCP_DSS_ACK64 8 |
648ef4b8 | 48 | #define TCPOLEN_MPTCP_DSS_MAP32 10 |
6d0060f6 MM |
49 | #define TCPOLEN_MPTCP_DSS_MAP64 14 |
50 | #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 | |
3df523ab PK |
51 | #define TCPOLEN_MPTCP_ADD_ADDR 16 |
52 | #define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 | |
53 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 | |
54 | #define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 | |
55 | #define TCPOLEN_MPTCP_ADD_ADDR6 28 | |
56 | #define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 | |
57 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 | |
58 | #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 | |
59 | #define TCPOLEN_MPTCP_PORT_LEN 2 | |
60 | #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 | |
eda7acdd | 61 | |
ec3edaa7 | 62 | /* MPTCP MP_JOIN flags */ |
f296234c PK |
63 | #define MPTCPOPT_BACKUP BIT(0) |
64 | #define MPTCPOPT_HMAC_LEN 20 | |
ec3edaa7 | 65 | #define MPTCPOPT_THMAC_LEN 8 |
f296234c | 66 | |
eda7acdd PK |
67 | /* MPTCP MP_CAPABLE flags */ |
68 | #define MPTCP_VERSION_MASK (0x0F) | |
69 | #define MPTCP_CAP_CHECKSUM_REQD BIT(7) | |
70 | #define MPTCP_CAP_EXTENSIBILITY BIT(6) | |
65492c5a | 71 | #define MPTCP_CAP_HMAC_SHA256 BIT(0) |
eda7acdd PK |
72 | #define MPTCP_CAP_FLAG_MASK (0x3F) |
73 | ||
6d0060f6 MM |
74 | /* MPTCP DSS flags */ |
75 | #define MPTCP_DSS_DATA_FIN BIT(4) | |
76 | #define MPTCP_DSS_DSN64 BIT(3) | |
77 | #define MPTCP_DSS_HAS_MAP BIT(2) | |
78 | #define MPTCP_DSS_ACK64 BIT(1) | |
79 | #define MPTCP_DSS_HAS_ACK BIT(0) | |
648ef4b8 MM |
80 | #define MPTCP_DSS_FLAG_MASK (0x1F) |
81 | ||
3df523ab PK |
82 | /* MPTCP ADD_ADDR flags */ |
83 | #define MPTCP_ADDR_ECHO BIT(0) | |
84 | #define MPTCP_ADDR_HMAC_LEN 20 | |
85 | #define MPTCP_ADDR_IPVERSION_4 4 | |
86 | #define MPTCP_ADDR_IPVERSION_6 6 | |
87 | ||
648ef4b8 | 88 | /* MPTCP socket flags */ |
d99bfed5 FW |
89 | #define MPTCP_DATA_READY 0 |
90 | #define MPTCP_SEND_SPACE 1 | |
3b1d6210 | 91 | #define MPTCP_WORK_RTX 2 |
59832e24 | 92 | #define MPTCP_WORK_EOF 3 |
6d0060f6 | 93 | |
3df523ab PK |
94 | static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) |
95 | { | |
96 | return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | | |
97 | ((nib & 0xF) << 8) | field); | |
98 | } | |
99 | ||
1b1c7a0e PK |
100 | #define MPTCP_PM_MAX_ADDR 4 |
101 | ||
102 | struct mptcp_addr_info { | |
103 | sa_family_t family; | |
104 | __be16 port; | |
105 | u8 id; | |
106 | union { | |
107 | struct in_addr addr; | |
108 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | |
109 | struct in6_addr addr6; | |
110 | #endif | |
111 | }; | |
112 | }; | |
113 | ||
114 | enum mptcp_pm_status { | |
115 | MPTCP_PM_ADD_ADDR_RECEIVED, | |
116 | MPTCP_PM_ESTABLISHED, | |
117 | MPTCP_PM_SUBFLOW_ESTABLISHED, | |
118 | }; | |
119 | ||
120 | struct mptcp_pm_data { | |
121 | struct mptcp_addr_info local; | |
122 | struct mptcp_addr_info remote; | |
123 | ||
124 | spinlock_t lock; /*protects the whole PM data */ | |
125 | ||
126 | bool addr_signal; | |
127 | bool server_side; | |
128 | bool work_pending; | |
129 | bool accept_addr; | |
130 | bool accept_subflow; | |
131 | u8 add_addr_signaled; | |
132 | u8 add_addr_accepted; | |
133 | u8 local_addr_used; | |
134 | u8 subflows; | |
135 | u8 add_addr_signal_max; | |
136 | u8 add_addr_accept_max; | |
137 | u8 local_addr_max; | |
138 | u8 subflows_max; | |
139 | u8 status; | |
140 | ||
141 | struct work_struct work; | |
142 | }; | |
143 | ||
18b683bf PA |
144 | struct mptcp_data_frag { |
145 | struct list_head list; | |
146 | u64 data_seq; | |
147 | int data_len; | |
148 | int offset; | |
149 | int overhead; | |
150 | struct page *page; | |
151 | }; | |
152 | ||
f870fa0b MM |
153 | /* MPTCP connection sock */ |
154 | struct mptcp_sock { | |
155 | /* inet_connection_sock must be the first member */ | |
156 | struct inet_connection_sock sk; | |
cec37a6e PK |
157 | u64 local_key; |
158 | u64 remote_key; | |
6d0060f6 MM |
159 | u64 write_seq; |
160 | u64 ack_seq; | |
cc9d2566 | 161 | atomic64_t snd_una; |
b51f9b80 | 162 | unsigned long timer_ival; |
79c0949e | 163 | u32 token; |
648ef4b8 | 164 | unsigned long flags; |
d22f4988 | 165 | bool can_ack; |
ec3edaa7 | 166 | spinlock_t join_list_lock; |
80992017 | 167 | struct work_struct work; |
cec37a6e | 168 | struct list_head conn_list; |
18b683bf | 169 | struct list_head rtx_queue; |
ec3edaa7 | 170 | struct list_head join_list; |
6d0060f6 | 171 | struct skb_ext *cached_ext; /* for the next sendmsg */ |
f870fa0b | 172 | struct socket *subflow; /* outgoing connect/listener/!mp_capable */ |
8ab183de | 173 | struct sock *first; |
1b1c7a0e | 174 | struct mptcp_pm_data pm; |
f870fa0b MM |
175 | }; |
176 | ||
cec37a6e PK |
177 | #define mptcp_for_each_subflow(__msk, __subflow) \ |
178 | list_for_each_entry(__subflow, &((__msk)->conn_list), node) | |
179 | ||
f870fa0b MM |
180 | static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) |
181 | { | |
182 | return (struct mptcp_sock *)sk; | |
183 | } | |
184 | ||
18b683bf PA |
185 | static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) |
186 | { | |
187 | struct mptcp_sock *msk = mptcp_sk(sk); | |
188 | ||
189 | if (list_empty(&msk->rtx_queue)) | |
190 | return NULL; | |
191 | ||
192 | return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); | |
193 | } | |
194 | ||
7948f6cc FW |
195 | static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) |
196 | { | |
197 | struct mptcp_sock *msk = mptcp_sk(sk); | |
198 | ||
199 | if (list_empty(&msk->rtx_queue)) | |
200 | return NULL; | |
201 | ||
202 | return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list); | |
203 | } | |
204 | ||
cec37a6e PK |
205 | struct mptcp_subflow_request_sock { |
206 | struct tcp_request_sock sk; | |
d22f4988 | 207 | u16 mp_capable : 1, |
cec37a6e | 208 | mp_join : 1, |
d22f4988 CP |
209 | backup : 1, |
210 | remote_key_valid : 1; | |
1b1c7a0e | 211 | u8 local_id; |
f296234c | 212 | u8 remote_id; |
cec37a6e PK |
213 | u64 local_key; |
214 | u64 remote_key; | |
79c0949e PK |
215 | u64 idsn; |
216 | u32 token; | |
648ef4b8 | 217 | u32 ssn_offset; |
f296234c PK |
218 | u64 thmac; |
219 | u32 local_nonce; | |
220 | u32 remote_nonce; | |
cec37a6e PK |
221 | }; |
222 | ||
223 | static inline struct mptcp_subflow_request_sock * | |
224 | mptcp_subflow_rsk(const struct request_sock *rsk) | |
225 | { | |
226 | return (struct mptcp_subflow_request_sock *)rsk; | |
227 | } | |
228 | ||
2303f994 PK |
229 | /* MPTCP subflow context */ |
230 | struct mptcp_subflow_context { | |
cec37a6e PK |
231 | struct list_head node;/* conn_list of subflows */ |
232 | u64 local_key; | |
233 | u64 remote_key; | |
79c0949e | 234 | u64 idsn; |
648ef4b8 | 235 | u64 map_seq; |
cc7972ea | 236 | u32 snd_isn; |
79c0949e | 237 | u32 token; |
6d0060f6 | 238 | u32 rel_write_seq; |
648ef4b8 MM |
239 | u32 map_subflow_seq; |
240 | u32 ssn_offset; | |
241 | u32 map_data_len; | |
cec37a6e | 242 | u32 request_mptcp : 1, /* send MP_CAPABLE */ |
ec3edaa7 PK |
243 | request_join : 1, /* send MP_JOIN */ |
244 | request_bkup : 1, | |
cec37a6e | 245 | mp_capable : 1, /* remote is MPTCP capable */ |
f296234c | 246 | mp_join : 1, /* remote is JOINing */ |
0be534f5 | 247 | fully_established : 1, /* path validated */ |
f296234c | 248 | pm_notified : 1, /* PM hook called for established status */ |
648ef4b8 MM |
249 | conn_finished : 1, |
250 | map_valid : 1, | |
d22f4988 | 251 | mpc_map : 1, |
f296234c | 252 | backup : 1, |
648ef4b8 | 253 | data_avail : 1, |
d22f4988 | 254 | rx_eof : 1, |
76c42a29 | 255 | data_fin_tx_enable : 1, |
d22f4988 | 256 | can_ack : 1; /* only after processing the remote a key */ |
76c42a29 | 257 | u64 data_fin_tx_seq; |
f296234c PK |
258 | u32 remote_nonce; |
259 | u64 thmac; | |
260 | u32 local_nonce; | |
ec3edaa7 PK |
261 | u32 remote_token; |
262 | u8 hmac[MPTCPOPT_HMAC_LEN]; | |
f296234c PK |
263 | u8 local_id; |
264 | u8 remote_id; | |
648ef4b8 | 265 | |
2303f994 PK |
266 | struct sock *tcp_sock; /* tcp sk backpointer */ |
267 | struct sock *conn; /* parent mptcp_sock */ | |
cec37a6e | 268 | const struct inet_connection_sock_af_ops *icsk_af_ops; |
648ef4b8 MM |
269 | void (*tcp_data_ready)(struct sock *sk); |
270 | void (*tcp_state_change)(struct sock *sk); | |
271 | void (*tcp_write_space)(struct sock *sk); | |
272 | ||
2303f994 PK |
273 | struct rcu_head rcu; |
274 | }; | |
275 | ||
276 | static inline struct mptcp_subflow_context * | |
277 | mptcp_subflow_ctx(const struct sock *sk) | |
278 | { | |
279 | struct inet_connection_sock *icsk = inet_csk(sk); | |
280 | ||
281 | /* Use RCU on icsk_ulp_data only for sock diag code */ | |
282 | return (__force struct mptcp_subflow_context *)icsk->icsk_ulp_data; | |
283 | } | |
284 | ||
285 | static inline struct sock * | |
286 | mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) | |
287 | { | |
288 | return subflow->tcp_sock; | |
289 | } | |
290 | ||
648ef4b8 MM |
291 | static inline u64 |
292 | mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) | |
293 | { | |
294 | return tcp_sk(mptcp_subflow_tcp_sock(subflow))->copied_seq - | |
295 | subflow->ssn_offset - | |
296 | subflow->map_subflow_seq; | |
297 | } | |
298 | ||
299 | static inline u64 | |
300 | mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) | |
301 | { | |
302 | return subflow->map_seq + mptcp_subflow_get_map_offset(subflow); | |
303 | } | |
304 | ||
305 | int mptcp_is_enabled(struct net *net); | |
306 | bool mptcp_subflow_data_available(struct sock *sk); | |
2303f994 | 307 | void mptcp_subflow_init(void); |
ec3edaa7 PK |
308 | |
309 | /* called with sk socket lock held */ | |
310 | int __mptcp_subflow_connect(struct sock *sk, int ifindex, | |
311 | const struct mptcp_addr_info *loc, | |
312 | const struct mptcp_addr_info *remote); | |
2303f994 PK |
313 | int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); |
314 | ||
648ef4b8 MM |
315 | static inline void mptcp_subflow_tcp_fallback(struct sock *sk, |
316 | struct mptcp_subflow_context *ctx) | |
317 | { | |
318 | sk->sk_data_ready = ctx->tcp_data_ready; | |
319 | sk->sk_state_change = ctx->tcp_state_change; | |
320 | sk->sk_write_space = ctx->tcp_write_space; | |
321 | ||
322 | inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; | |
323 | } | |
324 | ||
cec37a6e PK |
325 | extern const struct inet_connection_sock_af_ops ipv4_specific; |
326 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) | |
327 | extern const struct inet_connection_sock_af_ops ipv6_specific; | |
328 | #endif | |
329 | ||
648ef4b8 | 330 | void mptcp_proto_init(void); |
784325e9 MB |
331 | #if IS_ENABLED(CONFIG_MPTCP_IPV6) |
332 | int mptcp_proto_v6_init(void); | |
333 | #endif | |
648ef4b8 | 334 | |
58b09919 | 335 | struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); |
cec37a6e PK |
336 | void mptcp_get_options(const struct sk_buff *skb, |
337 | struct tcp_options_received *opt_rx); | |
338 | ||
339 | void mptcp_finish_connect(struct sock *sk); | |
2e52213c | 340 | void mptcp_data_ready(struct sock *sk, struct sock *ssk); |
f296234c | 341 | bool mptcp_finish_join(struct sock *sk); |
b51f9b80 | 342 | void mptcp_data_acked(struct sock *sk); |
59832e24 | 343 | void mptcp_subflow_eof(struct sock *sk); |
cec37a6e | 344 | |
79c0949e PK |
345 | int mptcp_token_new_request(struct request_sock *req); |
346 | void mptcp_token_destroy_request(u32 token); | |
347 | int mptcp_token_new_connect(struct sock *sk); | |
58b09919 | 348 | int mptcp_token_new_accept(u32 token, struct sock *conn); |
f296234c | 349 | struct mptcp_sock *mptcp_token_get_sock(u32 token); |
79c0949e PK |
350 | void mptcp_token_destroy(u32 token); |
351 | ||
352 | void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); | |
353 | static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) | |
354 | { | |
355 | /* we might consider a faster version that computes the key as a | |
356 | * hash of some information available in the MPTCP socket. Use | |
357 | * random data at the moment, as it's probably the safest option | |
358 | * in case multiple sockets are opened in different namespaces at | |
359 | * the same time. | |
360 | */ | |
361 | get_random_bytes(key, sizeof(u64)); | |
362 | mptcp_crypto_key_sha(*key, token, idsn); | |
363 | } | |
364 | ||
3df523ab | 365 | void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); |
79c0949e | 366 | |
1b1c7a0e PK |
367 | void mptcp_pm_init(void); |
368 | void mptcp_pm_data_init(struct mptcp_sock *msk); | |
926bdeab | 369 | void mptcp_pm_close(struct mptcp_sock *msk); |
1b1c7a0e PK |
370 | void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); |
371 | void mptcp_pm_fully_established(struct mptcp_sock *msk); | |
372 | bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); | |
373 | void mptcp_pm_connection_closed(struct mptcp_sock *msk); | |
374 | void mptcp_pm_subflow_established(struct mptcp_sock *msk, | |
375 | struct mptcp_subflow_context *subflow); | |
376 | void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); | |
377 | void mptcp_pm_add_addr_received(struct mptcp_sock *msk, | |
378 | const struct mptcp_addr_info *addr); | |
379 | ||
380 | int mptcp_pm_announce_addr(struct mptcp_sock *msk, | |
381 | const struct mptcp_addr_info *addr); | |
382 | int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); | |
383 | int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 remote_id); | |
384 | ||
385 | static inline bool mptcp_pm_should_signal(struct mptcp_sock *msk) | |
386 | { | |
387 | return READ_ONCE(msk->pm.addr_signal); | |
388 | } | |
389 | ||
390 | static inline unsigned int mptcp_add_addr_len(int family) | |
391 | { | |
392 | if (family == AF_INET) | |
393 | return TCPOLEN_MPTCP_ADD_ADDR; | |
394 | return TCPOLEN_MPTCP_ADD_ADDR6; | |
395 | } | |
396 | ||
397 | bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, | |
398 | struct mptcp_addr_info *saddr); | |
399 | int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); | |
400 | ||
01cacb00 PA |
401 | void mptcp_pm_nl_init(void); |
402 | void mptcp_pm_nl_data_init(struct mptcp_sock *msk); | |
403 | void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); | |
404 | void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); | |
405 | void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk); | |
406 | int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); | |
407 | ||
6d0060f6 MM |
408 | static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) |
409 | { | |
410 | return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP); | |
411 | } | |
412 | ||
648ef4b8 MM |
413 | static inline bool before64(__u64 seq1, __u64 seq2) |
414 | { | |
415 | return (__s64)(seq1 - seq2) < 0; | |
416 | } | |
417 | ||
418 | #define after64(seq2, seq1) before64(seq1, seq2) | |
419 | ||
5147dfb5 DC |
420 | void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); |
421 | ||
f870fa0b | 422 | #endif /* __MPTCP_PROTOCOL_H */ |