]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
Treewide: Stop corrupting socket's task_frag
authorBenjamin Coddington <bcodding@redhat.com>
Fri, 16 Dec 2022 12:45:27 +0000 (07:45 -0500)
committerJakub Kicinski <kuba@kernel.org>
Tue, 20 Dec 2022 01:28:49 +0000 (17:28 -0800)
Since moving to memalloc_nofs_save/restore, SUNRPC has stopped setting the
GFP_NOIO flag on sk_allocation which the networking system uses to decide
when it is safe to use current->task_frag.  The results of this are
unexpected corruption in task_frag when SUNRPC is involved in memory
reclaim.

The corruption can be seen in crashes, but the root cause is often
difficult to ascertain as a crashing machine's stack trace will have no
evidence of being near NFS or SUNRPC code.  I believe this problem to
be much more pervasive than reports to the community may indicate.

Fix this by having kernel users of sockets that may corrupt task_frag due
to reclaim set sk_use_task_frag = false.  Preemptively correcting this
situation for users that still set sk_allocation allows them to convert to
memalloc_nofs_save/restore without the same unexpected corruptions that are
sure to follow, unlikely to show up in testing, and difficult to bisect.

CC: Philipp Reisner <philipp.reisner@linbit.com>
CC: Lars Ellenberg <lars.ellenberg@linbit.com>
CC: "Christoph Böhmwalder" <christoph.boehmwalder@linbit.com>
CC: Jens Axboe <axboe@kernel.dk>
CC: Josef Bacik <josef@toxicpanda.com>
CC: Keith Busch <kbusch@kernel.org>
CC: Christoph Hellwig <hch@lst.de>
CC: Sagi Grimberg <sagi@grimberg.me>
CC: Lee Duncan <lduncan@suse.com>
CC: Chris Leech <cleech@redhat.com>
CC: Mike Christie <michael.christie@oracle.com>
CC: "James E.J. Bottomley" <jejb@linux.ibm.com>
CC: "Martin K. Petersen" <martin.petersen@oracle.com>
CC: Valentina Manea <valentina.manea.m@gmail.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: David Howells <dhowells@redhat.com>
CC: Marc Dionne <marc.dionne@auristor.com>
CC: Steve French <sfrench@samba.org>
CC: Christine Caulfield <ccaulfie@redhat.com>
CC: David Teigland <teigland@redhat.com>
CC: Mark Fasheh <mark@fasheh.com>
CC: Joel Becker <jlbec@evilplan.org>
CC: Joseph Qi <joseph.qi@linux.alibaba.com>
CC: Eric Van Hensbergen <ericvh@gmail.com>
CC: Latchesar Ionkov <lucho@ionkov.net>
CC: Dominique Martinet <asmadeus@codewreck.org>
CC: Ilya Dryomov <idryomov@gmail.com>
CC: Xiubo Li <xiubli@redhat.com>
CC: Chuck Lever <chuck.lever@oracle.com>
CC: Jeff Layton <jlayton@kernel.org>
CC: Trond Myklebust <trond.myklebust@hammerspace.com>
CC: Anna Schumaker <anna@kernel.org>
CC: Steffen Klassert <steffen.klassert@secunet.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
Suggested-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
12 files changed:
drivers/block/drbd/drbd_receiver.c
drivers/block/nbd.c
drivers/nvme/host/tcp.c
drivers/scsi/iscsi_tcp.c
drivers/usb/usbip/usbip_common.c
fs/cifs/connect.c
fs/dlm/lowcomms.c
fs/ocfs2/cluster/tcp.c
net/9p/trans_fd.c
net/ceph/messenger.c
net/sunrpc/xprtsock.c
net/xfrm/espintcp.c

index 0e58a3187345e73b6a386d1c2888f6c441f0ac3e..757f4692b5bd8034909b32334b27f5e952bb935e 100644 (file)
@@ -1030,6 +1030,9 @@ randomize:
        sock.socket->sk->sk_allocation = GFP_NOIO;
        msock.socket->sk->sk_allocation = GFP_NOIO;
 
+       sock.socket->sk->sk_use_task_frag = false;
+       msock.socket->sk->sk_use_task_frag = false;
+
        sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
        msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
 
index e379ccc63c520788c93c9adfab077fb41dda8701..592cfa8b765a578bd69a7c5cf33f49964d565e5f 100644 (file)
@@ -512,6 +512,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
        noreclaim_flag = memalloc_noreclaim_save();
        do {
                sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
+               sock->sk->sk_use_task_frag = false;
                msg.msg_name = NULL;
                msg.msg_namelen = 0;
                msg.msg_control = NULL;
index b69b89166b6b96f131aa3c55b56ae7572475973e..8cedc1ef496c70937ce2d7fd264ff4e09e28d11d 100644 (file)
@@ -1537,6 +1537,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
        queue->sock->sk->sk_rcvtimeo = 10 * HZ;
 
        queue->sock->sk->sk_allocation = GFP_ATOMIC;
+       queue->sock->sk->sk_use_task_frag = false;
        nvme_tcp_set_queue_io_cpu(queue);
        queue->request = NULL;
        queue->data_remaining = 0;
index 5fb1f364e8155d7a3c857cbff10ad02ca9c89c33..1d1cf641937c14738596a68d6cb546c576bef741 100644 (file)
@@ -738,6 +738,7 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
        sk->sk_reuse = SK_CAN_REUSE;
        sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
        sk->sk_allocation = GFP_ATOMIC;
+       sk->sk_use_task_frag = false;
        sk_set_memalloc(sk);
        sock_no_linger(sk);
 
index f8b326eed54dcab356fa18025f8f34b4a11174a6..a2b2da1255dda0c1b282171ead393dc9ac0a9c68 100644 (file)
@@ -315,6 +315,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 
        do {
                sock->sk->sk_allocation = GFP_NOIO;
+               sock->sk->sk_use_task_frag = false;
 
                result = sock_recvmsg(sock, &msg, MSG_WAITALL);
                if (result <= 0)
index e80252a83225717b9d7bbb9b504d2042f91f8f32..7bc7b5e03c513345e7bb413094da405627ef8579 100644 (file)
@@ -2944,6 +2944,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
                cifs_dbg(FYI, "Socket created\n");
                server->ssocket = socket;
                socket->sk->sk_allocation = GFP_NOFS;
+               socket->sk->sk_use_task_frag = false;
                if (sfamily == AF_INET6)
                        cifs_reclassify_socket6(socket);
                else
index 8b80ca0cd65fdc8d9b38a754ca58185edbbedebd..4450721ec83c6592f1d1eba34da8ea16bb561ff9 100644 (file)
@@ -645,6 +645,7 @@ static void add_sock(struct socket *sock, struct connection *con)
        if (dlm_config.ci_protocol == DLM_PROTO_SCTP)
                sk->sk_state_change = lowcomms_state_change;
        sk->sk_allocation = GFP_NOFS;
+       sk->sk_use_task_frag = false;
        sk->sk_error_report = lowcomms_error_report;
        release_sock(sk);
 }
@@ -1769,6 +1770,7 @@ static int dlm_listen_for_all(void)
        listen_con.sock = sock;
 
        sock->sk->sk_allocation = GFP_NOFS;
+       sock->sk->sk_use_task_frag = false;
        sock->sk->sk_data_ready = lowcomms_listen_data_ready;
        release_sock(sock->sk);
 
index 37d222bdfc8c3b0e446b8d84769250c2c34d3ee0..a07b24d170f235045359bf201c86f77133a934e9 100644 (file)
@@ -1602,6 +1602,7 @@ static void o2net_start_connect(struct work_struct *work)
        sc->sc_sock = sock; /* freed by sc_kref_release */
 
        sock->sk->sk_allocation = GFP_ATOMIC;
+       sock->sk->sk_use_task_frag = false;
 
        myaddr.sin_family = AF_INET;
        myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;
index 07db2f436d44b03cb706a042df91825b37580f05..d9120f14684b65d01cbc343e4d7a24d568181912 100644 (file)
@@ -868,6 +868,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
        }
 
        csocket->sk->sk_allocation = GFP_NOIO;
+       csocket->sk->sk_use_task_frag = false;
        file = sock_alloc_file(csocket, 0, NULL);
        if (IS_ERR(file)) {
                pr_err("%s (%d): failed to map fd\n",
index dfa237fbd5a325ab96bce3bfe05ceb78b11eceac..1d06e114ba3ff803587c3972cdd7c10e19d6a720 100644 (file)
@@ -446,6 +446,7 @@ int ceph_tcp_connect(struct ceph_connection *con)
        if (ret)
                return ret;
        sock->sk->sk_allocation = GFP_NOFS;
+       sock->sk->sk_use_task_frag = false;
 
 #ifdef CONFIG_LOCKDEP
        lockdep_set_class(&sock->sk->sk_lock, &socket_class);
index c0506d0d747802a78e65bd725b47b6e7e48f747a..aaa5b2741b79d1fc238eeb967c1bab9761f8ea6e 100644 (file)
@@ -1882,6 +1882,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
                sk->sk_write_space = xs_udp_write_space;
                sk->sk_state_change = xs_local_state_change;
                sk->sk_error_report = xs_error_report;
+               sk->sk_use_task_frag = false;
 
                xprt_clear_connected(xprt);
 
@@ -2082,6 +2083,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_user_data = xprt;
                sk->sk_data_ready = xs_data_ready;
                sk->sk_write_space = xs_udp_write_space;
+               sk->sk_use_task_frag = false;
 
                xprt_set_connected(xprt);
 
@@ -2249,6 +2251,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_state_change = xs_tcp_state_change;
                sk->sk_write_space = xs_tcp_write_space;
                sk->sk_error_report = xs_error_report;
+               sk->sk_use_task_frag = false;
 
                /* socket options */
                sock_reset_flag(sk, SOCK_LINGER);
index d6fece1ed982dff312469188222235fd5ae293ac..74a54295c164457f48a57198b605d2777e491562 100644 (file)
@@ -489,6 +489,7 @@ static int espintcp_init_sk(struct sock *sk)
 
        /* avoid using task_frag */
        sk->sk_allocation = GFP_ATOMIC;
+       sk->sk_use_task_frag = false;
 
        return 0;