]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
SUNRPC: Add basic load balancing to the transport switch
authorTrond Myklebust <trond.myklebust@primarydata.com>
Fri, 28 Apr 2017 14:52:42 +0000 (10:52 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Sat, 6 Jul 2019 18:54:49 +0000 (14:54 -0400)
For now, just count the queue length. It is less accurate than counting
number of bytes queued, but easier to implement.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtmultipath.h
net/sunrpc/clnt.c
net/sunrpc/xprtmultipath.c

index a6d9fce7f20ebd146d1160a665667ba1239d1f48..15322c1d9c8c164ef198218c21965ee9cae35e99 100644 (file)
@@ -238,6 +238,7 @@ struct rpc_xprt {
        /*
         * Send stuff
         */
+       atomic_long_t           queuelen;
        spinlock_t              transport_lock; /* lock transport info */
        spinlock_t              reserve_lock;   /* lock slot table */
        spinlock_t              queue_lock;     /* send/receive queue lock */
index af1257c030d2767913dc93d21650ca6b6345ebbb..c6cce3fbf29dbf6a5c023351cae5c628e96541ca 100644 (file)
@@ -15,6 +15,8 @@ struct rpc_xprt_switch {
        struct kref             xps_kref;
 
        unsigned int            xps_nxprts;
+       unsigned int            xps_nactive;
+       atomic_long_t           xps_queuelen;
        struct list_head        xps_xprt_list;
 
        struct net *            xps_net;
index b03bfa055c0827972cfe497a2dbf98df42f28445..976eab68bb5d5516735a6e9116960f2a8d9b3f6d 100644 (file)
@@ -968,13 +968,47 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+static struct rpc_xprt *
+rpc_task_get_xprt(struct rpc_clnt *clnt)
+{
+       struct rpc_xprt_switch *xps;
+       struct rpc_xprt *xprt= xprt_iter_get_next(&clnt->cl_xpi);
+
+       if (!xprt)
+               return NULL;
+       rcu_read_lock();
+       xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+       atomic_long_inc(&xps->xps_queuelen);
+       rcu_read_unlock();
+       atomic_long_inc(&xprt->queuelen);
+
+       return xprt;
+}
+
+static void
+rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+       struct rpc_xprt_switch *xps;
+
+       atomic_long_dec(&xprt->queuelen);
+       rcu_read_lock();
+       xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+       atomic_long_dec(&xps->xps_queuelen);
+       rcu_read_unlock();
+
+       xprt_put(xprt);
+}
+
 void rpc_task_release_transport(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
 
        if (xprt) {
                task->tk_xprt = NULL;
-               xprt_put(xprt);
+               if (task->tk_client)
+                       rpc_task_release_xprt(task->tk_client, xprt);
+               else
+                       xprt_put(xprt);
        }
 }
 EXPORT_SYMBOL_GPL(rpc_task_release_transport);
@@ -983,6 +1017,7 @@ void rpc_task_release_client(struct rpc_task *task)
 {
        struct rpc_clnt *clnt = task->tk_client;
 
+       rpc_task_release_transport(task);
        if (clnt != NULL) {
                /* Remove from client task list */
                spin_lock(&clnt->cl_lock);
@@ -992,14 +1027,13 @@ void rpc_task_release_client(struct rpc_task *task)
 
                rpc_release_client(clnt);
        }
-       rpc_task_release_transport(task);
 }
 
 static
 void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
 {
        if (!task->tk_xprt)
-               task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+               task->tk_xprt = rpc_task_get_xprt(clnt);
 }
 
 static
index 8394124126f8f2cabb73387f5a8c871dab5bbf20..394e427533beda31ed1bb3d80820b59fe08c022b 100644 (file)
@@ -36,6 +36,7 @@ static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
        if (xps->xps_nxprts == 0)
                xps->xps_net = xprt->xprt_net;
        xps->xps_nxprts++;
+       xps->xps_nactive++;
 }
 
 /**
@@ -62,6 +63,7 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
 {
        if (unlikely(xprt == NULL))
                return;
+       xps->xps_nactive--;
        xps->xps_nxprts--;
        if (xps->xps_nxprts == 0)
                xps->xps_net = NULL;
@@ -317,8 +319,24 @@ struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
 static
 struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
 {
-       return xprt_iter_next_entry_multiple(xpi,
+       struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
+       struct rpc_xprt *xprt;
+       unsigned long xprt_queuelen;
+       unsigned long xps_queuelen;
+       unsigned long xps_avglen;
+
+       do {
+               xprt = xprt_iter_next_entry_multiple(xpi,
                        xprt_switch_find_next_entry_roundrobin);
+               if (xprt == NULL)
+                       break;
+               xprt_queuelen = atomic_long_read(&xprt->queuelen);
+               if (xprt_queuelen <= 2)
+                       break;
+               xps_queuelen = atomic_long_read(&xps->xps_queuelen);
+               xps_avglen = DIV_ROUND_UP(xps_queuelen, xps->xps_nactive);
+       } while (xprt_queuelen > xps_avglen);
+       return xprt;
 }
 
 static