]>
Commit | Line | Data |
---|---|---|
eda7acdd PK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2017 - 2019, Intel Corporation. | |
5 | */ | |
6 | ||
7 | #include <linux/kernel.h> | |
8 | #include <net/tcp.h> | |
9 | #include <net/mptcp.h> | |
10 | #include "protocol.h" | |
11 | ||
65492c5a PA |
12 | static bool mptcp_cap_flag_sha256(u8 flags) |
13 | { | |
14 | return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; | |
15 | } | |
16 | ||
eda7acdd PK |
17 | void mptcp_parse_option(const unsigned char *ptr, int opsize, |
18 | struct tcp_options_received *opt_rx) | |
19 | { | |
20 | struct mptcp_options_received *mp_opt = &opt_rx->mptcp; | |
21 | u8 subtype = *ptr >> 4; | |
648ef4b8 | 22 | int expected_opsize; |
eda7acdd PK |
23 | u8 version; |
24 | u8 flags; | |
25 | ||
26 | switch (subtype) { | |
27 | case MPTCPOPT_MP_CAPABLE: | |
28 | if (opsize != TCPOLEN_MPTCP_MPC_SYN && | |
29 | opsize != TCPOLEN_MPTCP_MPC_ACK) | |
30 | break; | |
31 | ||
32 | version = *ptr++ & MPTCP_VERSION_MASK; | |
33 | if (version != MPTCP_SUPPORTED_VERSION) | |
34 | break; | |
35 | ||
36 | flags = *ptr++; | |
65492c5a | 37 | if (!mptcp_cap_flag_sha256(flags) || |
eda7acdd PK |
38 | (flags & MPTCP_CAP_EXTENSIBILITY)) |
39 | break; | |
40 | ||
41 | /* RFC 6824, Section 3.1: | |
42 | * "For the Checksum Required bit (labeled "A"), if either | |
43 | * host requires the use of checksums, checksums MUST be used. | |
44 | * In other words, the only way for checksums not to be used | |
45 | * is if both hosts in their SYNs set A=0." | |
46 | * | |
47 | * Section 3.3.0: | |
48 | * "If a checksum is not present when its use has been | |
49 | * negotiated, the receiver MUST close the subflow with a RST as | |
50 | * it is considered broken." | |
51 | * | |
52 | * We don't implement DSS checksum - fall back to TCP. | |
53 | */ | |
54 | if (flags & MPTCP_CAP_CHECKSUM_REQD) | |
55 | break; | |
56 | ||
57 | mp_opt->mp_capable = 1; | |
58 | mp_opt->sndr_key = get_unaligned_be64(ptr); | |
59 | ptr += 8; | |
60 | ||
61 | if (opsize == TCPOLEN_MPTCP_MPC_ACK) { | |
62 | mp_opt->rcvr_key = get_unaligned_be64(ptr); | |
63 | ptr += 8; | |
64 | pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu", | |
65 | mp_opt->sndr_key, mp_opt->rcvr_key); | |
66 | } else { | |
67 | pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key); | |
68 | } | |
69 | break; | |
70 | ||
71 | case MPTCPOPT_DSS: | |
72 | pr_debug("DSS"); | |
648ef4b8 MM |
73 | ptr++; |
74 | ||
75 | flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; | |
76 | mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; | |
77 | mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; | |
78 | mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0; | |
79 | mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; | |
80 | mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); | |
81 | ||
82 | pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d", | |
83 | mp_opt->data_fin, mp_opt->dsn64, | |
84 | mp_opt->use_map, mp_opt->ack64, | |
85 | mp_opt->use_ack); | |
86 | ||
87 | expected_opsize = TCPOLEN_MPTCP_DSS_BASE; | |
88 | ||
89 | if (mp_opt->use_ack) { | |
90 | if (mp_opt->ack64) | |
91 | expected_opsize += TCPOLEN_MPTCP_DSS_ACK64; | |
92 | else | |
93 | expected_opsize += TCPOLEN_MPTCP_DSS_ACK32; | |
94 | } | |
95 | ||
96 | if (mp_opt->use_map) { | |
97 | if (mp_opt->dsn64) | |
98 | expected_opsize += TCPOLEN_MPTCP_DSS_MAP64; | |
99 | else | |
100 | expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; | |
101 | } | |
102 | ||
103 | /* RFC 6824, Section 3.3: | |
104 | * If a checksum is present, but its use had | |
105 | * not been negotiated in the MP_CAPABLE handshake, | |
106 | * the checksum field MUST be ignored. | |
107 | */ | |
108 | if (opsize != expected_opsize && | |
109 | opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) | |
110 | break; | |
111 | ||
eda7acdd | 112 | mp_opt->dss = 1; |
648ef4b8 MM |
113 | |
114 | if (mp_opt->use_ack) { | |
115 | if (mp_opt->ack64) { | |
116 | mp_opt->data_ack = get_unaligned_be64(ptr); | |
117 | ptr += 8; | |
118 | } else { | |
119 | mp_opt->data_ack = get_unaligned_be32(ptr); | |
120 | ptr += 4; | |
121 | } | |
122 | ||
123 | pr_debug("data_ack=%llu", mp_opt->data_ack); | |
124 | } | |
125 | ||
126 | if (mp_opt->use_map) { | |
127 | if (mp_opt->dsn64) { | |
128 | mp_opt->data_seq = get_unaligned_be64(ptr); | |
129 | ptr += 8; | |
130 | } else { | |
131 | mp_opt->data_seq = get_unaligned_be32(ptr); | |
132 | ptr += 4; | |
133 | } | |
134 | ||
135 | mp_opt->subflow_seq = get_unaligned_be32(ptr); | |
136 | ptr += 4; | |
137 | ||
138 | mp_opt->data_len = get_unaligned_be16(ptr); | |
139 | ptr += 2; | |
140 | ||
141 | pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", | |
142 | mp_opt->data_seq, mp_opt->subflow_seq, | |
143 | mp_opt->data_len); | |
144 | } | |
145 | ||
eda7acdd PK |
146 | break; |
147 | ||
148 | default: | |
149 | break; | |
150 | } | |
151 | } | |
152 | ||
cec37a6e PK |
153 | void mptcp_get_options(const struct sk_buff *skb, |
154 | struct tcp_options_received *opt_rx) | |
155 | { | |
156 | const unsigned char *ptr; | |
157 | const struct tcphdr *th = tcp_hdr(skb); | |
158 | int length = (th->doff * 4) - sizeof(struct tcphdr); | |
159 | ||
160 | ptr = (const unsigned char *)(th + 1); | |
161 | ||
162 | while (length > 0) { | |
163 | int opcode = *ptr++; | |
164 | int opsize; | |
165 | ||
166 | switch (opcode) { | |
167 | case TCPOPT_EOL: | |
168 | return; | |
169 | case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ | |
170 | length--; | |
171 | continue; | |
172 | default: | |
173 | opsize = *ptr++; | |
174 | if (opsize < 2) /* "silly options" */ | |
175 | return; | |
176 | if (opsize > length) | |
177 | return; /* don't parse partial options */ | |
178 | if (opcode == TCPOPT_MPTCP) | |
179 | mptcp_parse_option(ptr, opsize, opt_rx); | |
180 | ptr += opsize - 2; | |
181 | length -= opsize; | |
182 | } | |
183 | } | |
184 | } | |
185 | ||
186 | bool mptcp_syn_options(struct sock *sk, unsigned int *size, | |
187 | struct mptcp_out_options *opts) | |
188 | { | |
189 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
190 | ||
191 | if (subflow->request_mptcp) { | |
192 | pr_debug("local_key=%llu", subflow->local_key); | |
193 | opts->suboptions = OPTION_MPTCP_MPC_SYN; | |
194 | opts->sndr_key = subflow->local_key; | |
195 | *size = TCPOLEN_MPTCP_MPC_SYN; | |
196 | return true; | |
197 | } | |
198 | return false; | |
199 | } | |
200 | ||
201 | void mptcp_rcv_synsent(struct sock *sk) | |
202 | { | |
203 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
204 | struct tcp_sock *tp = tcp_sk(sk); | |
205 | ||
206 | pr_debug("subflow=%p", subflow); | |
207 | if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { | |
208 | subflow->mp_capable = 1; | |
209 | subflow->remote_key = tp->rx_opt.mptcp.sndr_key; | |
210 | } else { | |
211 | tcp_sk(sk)->is_mptcp = 0; | |
212 | } | |
213 | } | |
214 | ||
6d0060f6 MM |
215 | static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size, |
216 | unsigned int remaining, | |
217 | struct mptcp_out_options *opts) | |
cec37a6e PK |
218 | { |
219 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
220 | ||
6d0060f6 | 221 | if (!subflow->fourth_ack) { |
cec37a6e PK |
222 | opts->suboptions = OPTION_MPTCP_MPC_ACK; |
223 | opts->sndr_key = subflow->local_key; | |
224 | opts->rcvr_key = subflow->remote_key; | |
225 | *size = TCPOLEN_MPTCP_MPC_ACK; | |
226 | subflow->fourth_ack = 1; | |
227 | pr_debug("subflow=%p, local_key=%llu, remote_key=%llu", | |
228 | subflow, subflow->local_key, subflow->remote_key); | |
229 | return true; | |
230 | } | |
231 | return false; | |
232 | } | |
233 | ||
6d0060f6 MM |
234 | static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, |
235 | struct mptcp_ext *ext) | |
236 | { | |
237 | ext->data_fin = 1; | |
238 | ||
239 | if (!ext->use_map) { | |
240 | /* RFC6824 requires a DSS mapping with specific values | |
241 | * if DATA_FIN is set but no data payload is mapped | |
242 | */ | |
243 | ext->use_map = 1; | |
244 | ext->dsn64 = 1; | |
245 | ext->data_seq = mptcp_sk(subflow->conn)->write_seq; | |
246 | ext->subflow_seq = 0; | |
247 | ext->data_len = 1; | |
248 | } else { | |
249 | /* If there's an existing DSS mapping, DATA_FIN consumes | |
250 | * 1 additional byte of mapping space. | |
251 | */ | |
252 | ext->data_len++; | |
253 | } | |
254 | } | |
255 | ||
256 | static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, | |
257 | unsigned int *size, | |
258 | unsigned int remaining, | |
259 | struct mptcp_out_options *opts) | |
260 | { | |
261 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); | |
262 | unsigned int dss_size = 0; | |
263 | struct mptcp_ext *mpext; | |
264 | struct mptcp_sock *msk; | |
265 | unsigned int ack_size; | |
266 | u8 tcp_fin; | |
267 | ||
268 | if (skb) { | |
269 | mpext = mptcp_get_ext(skb); | |
270 | tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; | |
271 | } else { | |
272 | mpext = NULL; | |
273 | tcp_fin = 0; | |
274 | } | |
275 | ||
276 | if (!skb || (mpext && mpext->use_map) || tcp_fin) { | |
277 | unsigned int map_size; | |
278 | ||
279 | map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; | |
280 | ||
281 | remaining -= map_size; | |
282 | dss_size = map_size; | |
283 | if (mpext) | |
284 | opts->ext_copy = *mpext; | |
285 | ||
286 | if (skb && tcp_fin && | |
287 | subflow->conn->sk_state != TCP_ESTABLISHED) | |
288 | mptcp_write_data_fin(subflow, &opts->ext_copy); | |
289 | } | |
290 | ||
291 | ack_size = TCPOLEN_MPTCP_DSS_ACK64; | |
292 | ||
293 | /* Add kind/length/subtype/flag overhead if mapping is not populated */ | |
294 | if (dss_size == 0) | |
295 | ack_size += TCPOLEN_MPTCP_DSS_BASE; | |
296 | ||
297 | dss_size += ack_size; | |
298 | ||
299 | msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); | |
300 | if (msk) { | |
301 | opts->ext_copy.data_ack = msk->ack_seq; | |
302 | } else { | |
303 | mptcp_crypto_key_sha(mptcp_subflow_ctx(sk)->remote_key, | |
304 | NULL, &opts->ext_copy.data_ack); | |
305 | opts->ext_copy.data_ack++; | |
306 | } | |
307 | ||
308 | opts->ext_copy.ack64 = 1; | |
309 | opts->ext_copy.use_ack = 1; | |
310 | ||
311 | *size = ALIGN(dss_size, 4); | |
312 | return true; | |
313 | } | |
314 | ||
315 | bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, | |
316 | unsigned int *size, unsigned int remaining, | |
317 | struct mptcp_out_options *opts) | |
318 | { | |
319 | unsigned int opt_size = 0; | |
320 | bool ret = false; | |
321 | ||
322 | if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) | |
323 | ret = true; | |
324 | else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, | |
325 | opts)) | |
326 | ret = true; | |
327 | ||
328 | /* we reserved enough space for the above options, and exceeding the | |
329 | * TCP option space would be fatal | |
330 | */ | |
331 | if (WARN_ON_ONCE(opt_size > remaining)) | |
332 | return false; | |
333 | ||
334 | *size += opt_size; | |
335 | remaining -= opt_size; | |
336 | ||
337 | return ret; | |
338 | } | |
339 | ||
cec37a6e PK |
340 | bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, |
341 | struct mptcp_out_options *opts) | |
342 | { | |
343 | struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); | |
344 | ||
345 | if (subflow_req->mp_capable) { | |
346 | opts->suboptions = OPTION_MPTCP_MPC_SYNACK; | |
347 | opts->sndr_key = subflow_req->local_key; | |
348 | *size = TCPOLEN_MPTCP_MPC_SYNACK; | |
349 | pr_debug("subflow_req=%p, local_key=%llu", | |
350 | subflow_req, subflow_req->local_key); | |
351 | return true; | |
352 | } | |
353 | return false; | |
354 | } | |
355 | ||
648ef4b8 MM |
356 | void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, |
357 | struct tcp_options_received *opt_rx) | |
358 | { | |
359 | struct mptcp_options_received *mp_opt; | |
360 | struct mptcp_ext *mpext; | |
361 | ||
362 | mp_opt = &opt_rx->mptcp; | |
363 | ||
364 | if (!mp_opt->dss) | |
365 | return; | |
366 | ||
367 | mpext = skb_ext_add(skb, SKB_EXT_MPTCP); | |
368 | if (!mpext) | |
369 | return; | |
370 | ||
371 | memset(mpext, 0, sizeof(*mpext)); | |
372 | ||
373 | if (mp_opt->use_map) { | |
374 | mpext->data_seq = mp_opt->data_seq; | |
375 | mpext->subflow_seq = mp_opt->subflow_seq; | |
376 | mpext->data_len = mp_opt->data_len; | |
377 | mpext->use_map = 1; | |
378 | mpext->dsn64 = mp_opt->dsn64; | |
379 | } | |
380 | ||
381 | if (mp_opt->use_ack) { | |
382 | mpext->data_ack = mp_opt->data_ack; | |
383 | mpext->use_ack = 1; | |
384 | mpext->ack64 = mp_opt->ack64; | |
385 | } | |
386 | ||
387 | mpext->data_fin = mp_opt->data_fin; | |
388 | } | |
389 | ||
eda7acdd PK |
390 | void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) |
391 | { | |
392 | if ((OPTION_MPTCP_MPC_SYN | | |
cec37a6e | 393 | OPTION_MPTCP_MPC_SYNACK | |
eda7acdd PK |
394 | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { |
395 | u8 len; | |
396 | ||
397 | if (OPTION_MPTCP_MPC_SYN & opts->suboptions) | |
398 | len = TCPOLEN_MPTCP_MPC_SYN; | |
cec37a6e PK |
399 | else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) |
400 | len = TCPOLEN_MPTCP_MPC_SYNACK; | |
eda7acdd PK |
401 | else |
402 | len = TCPOLEN_MPTCP_MPC_ACK; | |
403 | ||
404 | *ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) | | |
405 | (MPTCPOPT_MP_CAPABLE << 12) | | |
406 | (MPTCP_SUPPORTED_VERSION << 8) | | |
65492c5a | 407 | MPTCP_CAP_HMAC_SHA256); |
eda7acdd PK |
408 | put_unaligned_be64(opts->sndr_key, ptr); |
409 | ptr += 2; | |
410 | if (OPTION_MPTCP_MPC_ACK & opts->suboptions) { | |
411 | put_unaligned_be64(opts->rcvr_key, ptr); | |
412 | ptr += 2; | |
413 | } | |
414 | } | |
6d0060f6 MM |
415 | |
416 | if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { | |
417 | struct mptcp_ext *mpext = &opts->ext_copy; | |
418 | u8 len = TCPOLEN_MPTCP_DSS_BASE; | |
419 | u8 flags = 0; | |
420 | ||
421 | if (mpext->use_ack) { | |
422 | len += TCPOLEN_MPTCP_DSS_ACK64; | |
423 | flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; | |
424 | } | |
425 | ||
426 | if (mpext->use_map) { | |
427 | len += TCPOLEN_MPTCP_DSS_MAP64; | |
428 | ||
429 | /* Use only 64-bit mapping flags for now, add | |
430 | * support for optional 32-bit mappings later. | |
431 | */ | |
432 | flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; | |
433 | if (mpext->data_fin) | |
434 | flags |= MPTCP_DSS_DATA_FIN; | |
435 | } | |
436 | ||
437 | *ptr++ = htonl((TCPOPT_MPTCP << 24) | | |
438 | (len << 16) | | |
439 | (MPTCPOPT_DSS << 12) | | |
440 | (flags)); | |
441 | ||
442 | if (mpext->use_ack) { | |
443 | put_unaligned_be64(mpext->data_ack, ptr); | |
444 | ptr += 2; | |
445 | } | |
446 | ||
447 | if (mpext->use_map) { | |
448 | put_unaligned_be64(mpext->data_seq, ptr); | |
449 | ptr += 2; | |
450 | put_unaligned_be32(mpext->subflow_seq, ptr); | |
451 | ptr += 1; | |
452 | put_unaligned_be32(mpext->data_len << 16 | | |
453 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); | |
454 | } | |
455 | } | |
eda7acdd | 456 | } |