]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - net/mptcp/subflow.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[mirror_ubuntu-jammy-kernel.git] / net / mptcp / subflow.c
index 9ead43f79023bc2cbf9aad4cfb1367a85b6519dc..ac4a1fe3550bd0ddee6e4f3d808aa4e48003dac6 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/ip6_route.h>
 #endif
 #include <net/mptcp.h>
+#include <uapi/linux/mptcp.h>
 #include "protocol.h"
 #include "mib.h"
 
@@ -270,6 +271,19 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
        return thmac == subflow->thmac;
 }
 
+void mptcp_subflow_reset(struct sock *ssk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       struct sock *sk = subflow->conn;
+
+       tcp_set_state(ssk, TCP_CLOSE);
+       tcp_send_active_reset(ssk, GFP_ATOMIC);
+       tcp_done(ssk);
+       if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
+           schedule_work(&mptcp_sk(sk)->work))
+               sock_hold(sk);
+}
+
 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -342,8 +356,7 @@ fallback:
        return;
 
 do_reset:
-       tcp_send_active_reset(sk, GFP_ATOMIC);
-       tcp_done(sk);
+       mptcp_subflow_reset(sk);
 }
 
 struct request_sock_ops mptcp_subflow_request_sock_ops;
@@ -434,7 +447,7 @@ static void mptcp_sock_destruct(struct sock *sk)
                sock_orphan(sk);
        }
 
-       mptcp_token_destroy(mptcp_sk(sk));
+       mptcp_destroy_common(mptcp_sk(sk));
        inet_sock_destruct(sk);
 }
 
@@ -731,7 +744,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
        if (mpext->data_fin == 1) {
                if (data_len == 1) {
-                       mptcp_update_rcv_data_fin(msk, mpext->data_seq);
+                       bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
+                                                                mpext->dsn64);
                        pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
                        if (subflow->map_valid) {
                                /* A DATA_FIN might arrive in a DSS
@@ -742,11 +756,23 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
                                skb_ext_del(skb, SKB_EXT_MPTCP);
                                return MAPPING_OK;
                        } else {
+                               if (updated && schedule_work(&msk->work))
+                                       sock_hold((struct sock *)msk);
+
                                return MAPPING_DATA_FIN;
                        }
                } else {
-                       mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
-                       pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
+                       u64 data_fin_seq = mpext->data_seq + data_len - 1;
+
+                       /* If mpext->data_seq is a 32-bit value, data_fin_seq
+                        * must also be limited to 32 bits.
+                        */
+                       if (!mpext->dsn64)
+                               data_fin_seq &= GENMASK_ULL(31, 0);
+
+                       mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
+                       pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+                                data_fin_seq, mpext->dsn64);
                }
 
                /* Adjust for DATA_FIN using 1 byte of sequence space */
@@ -756,12 +782,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
        if (!mpext->dsn64) {
                map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
                                     mpext->data_seq);
-               subflow->use_64bit_ack = 0;
                pr_debug("expanded seq=%llu", subflow->map_seq);
        } else {
                map_seq = mpext->data_seq;
-               subflow->use_64bit_ack = 1;
        }
+       WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
 
        if (subflow->map_valid) {
                /* Allow replacing only with an identical map */
@@ -804,16 +829,25 @@ validate_seq:
        return MAPPING_OK;
 }
 
-static int subflow_read_actor(read_descriptor_t *desc,
-                             struct sk_buff *skb,
-                             unsigned int offset, size_t len)
+static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
+                                      u64 limit)
 {
-       size_t copy_len = min(desc->count, len);
-
-       desc->count -= copy_len;
-
-       pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
-       return copy_len;
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
+       u32 incr;
+
+       incr = limit >= skb->len ? skb->len + fin : limit;
+
+       pr_debug("discarding=%d len=%d seq=%d", incr, skb->len,
+                subflow->map_subflow_seq);
+       MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
+       tcp_sk(ssk)->copied_seq += incr;
+       if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq))
+               sk_eat_skb(ssk, skb);
+       if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len)
+               subflow->map_valid = 0;
+       if (incr)
+               tcp_cleanup_rbuf(ssk, incr);
 }
 
 static bool subflow_check_data_avail(struct sock *ssk)
@@ -825,13 +859,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
 
        pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
                 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
+       if (!skb_peek(&ssk->sk_receive_queue))
+               subflow->data_avail = 0;
        if (subflow->data_avail)
                return true;
 
        msk = mptcp_sk(subflow->conn);
        for (;;) {
-               u32 map_remaining;
-               size_t delta;
                u64 ack_seq;
                u64 old_ack;
 
@@ -849,6 +883,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
                        subflow->map_data_len = skb->len;
                        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
                                                   subflow->ssn_offset;
+                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
                        return true;
                }
 
@@ -876,42 +911,18 @@ static bool subflow_check_data_avail(struct sock *ssk)
                ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
                         ack_seq);
-               if (ack_seq == old_ack)
+               if (ack_seq == old_ack) {
+                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
                        break;
+               } else if (after64(ack_seq, old_ack)) {
+                       subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
+                       break;
+               }
 
                /* only accept in-sequence mapping. Old values are spurious
-                * retransmission; we can hit "future" values on active backup
-                * subflow switch, we relay on retransmissions to get
-                * in-sequence data.
-                * Cuncurrent subflows support will require subflow data
-                * reordering
+                * retransmission
                 */
-               map_remaining = subflow->map_data_len -
-                               mptcp_subflow_get_map_offset(subflow);
-               if (before64(ack_seq, old_ack))
-                       delta = min_t(size_t, old_ack - ack_seq, map_remaining);
-               else
-                       delta = min_t(size_t, ack_seq - old_ack, map_remaining);
-
-               /* discard mapped data */
-               pr_debug("discarding %zu bytes, current map len=%d", delta,
-                        map_remaining);
-               if (delta) {
-                       read_descriptor_t desc = {
-                               .count = delta,
-                       };
-                       int ret;
-
-                       ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
-                       if (ret < 0) {
-                               ssk->sk_err = -ret;
-                               goto fatal;
-                       }
-                       if (ret < delta)
-                               return false;
-                       if (delta == map_remaining)
-                               subflow->map_valid = 0;
-               }
+               mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
        }
        return true;
 
@@ -922,13 +933,13 @@ fatal:
        ssk->sk_error_report(ssk);
        tcp_set_state(ssk, TCP_CLOSE);
        tcp_send_active_reset(ssk, GFP_ATOMIC);
+       subflow->data_avail = 0;
        return false;
 }
 
 bool mptcp_subflow_data_available(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-       struct sk_buff *skb;
 
        /* check if current mapping is still valid */
        if (subflow->map_valid &&
@@ -941,15 +952,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
                         subflow->map_data_len);
        }
 
-       if (!subflow_check_data_avail(sk)) {
-               subflow->data_avail = 0;
-               return false;
-       }
-
-       skb = skb_peek(&sk->sk_receive_queue);
-       subflow->data_avail = skb &&
-                      before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
-       return subflow->data_avail;
+       return subflow_check_data_avail(sk);
 }
 
 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
@@ -996,8 +999,10 @@ static void subflow_write_space(struct sock *sk)
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
        struct sock *parent = subflow->conn;
 
-       sk_stream_write_space(sk);
-       if (sk_stream_is_writeable(sk)) {
+       if (!sk_stream_is_writeable(sk))
+               return;
+
+       if (sk_stream_is_writeable(parent)) {
                set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
                smp_mb__after_atomic();
                /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
@@ -1056,8 +1061,7 @@ static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 #endif
 }
 
-int __mptcp_subflow_connect(struct sock *sk, int ifindex,
-                           const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
                            const struct mptcp_addr_info *remote)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1102,7 +1106,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
        if (loc->family == AF_INET6)
                addrlen = sizeof(struct sockaddr_in6);
 #endif
-       ssk->sk_bound_dev_if = ifindex;
+       ssk->sk_bound_dev_if = loc->ifindex;
        err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
        if (err)
                goto failed;
@@ -1114,7 +1118,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
        subflow->local_id = local_id;
        subflow->remote_id = remote_id;
        subflow->request_join = 1;
-       subflow->request_bkup = 1;
+       subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
        mptcp_info2sockaddr(remote, &addr);
 
        err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);