]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - drivers/block/nbd.c
virtio-blk: limit number of hw queues by nr_cpu_ids
[mirror_ubuntu-bionic-kernel.git] / drivers / block / nbd.c
index 5f2a4240a204d54fc6fe87e569dc6165d5190530..a95abaf867d0fb6af0433d216dc1bf8ff1809b14 100644 (file)
@@ -76,6 +76,7 @@ struct link_dead_args {
 #define NBD_HAS_CONFIG_REF             4
 #define NBD_BOUND                      5
 #define NBD_DESTROY_ON_DISCONNECT      6
+#define NBD_DISCONNECT_ON_CLOSE        7
 
 struct nbd_config {
        u32 flags;
@@ -111,12 +112,16 @@ struct nbd_device {
        struct task_struct *task_setup;
 };
 
+#define NBD_CMD_REQUEUED       1
+
 struct nbd_cmd {
        struct nbd_device *nbd;
+       struct mutex lock;
        int index;
        int cookie;
-       struct completion send_complete;
        blk_status_t status;
+       unsigned long flags;
+       u32 cmd_cookie;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -138,12 +143,42 @@ static void nbd_config_put(struct nbd_device *nbd);
 static void nbd_connect_reply(struct genl_info *info, int index);
 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
 static void nbd_dead_link_work(struct work_struct *work);
+static void nbd_disconnect_and_put(struct nbd_device *nbd);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
        return disk_to_dev(nbd->disk);
 }
 
+static void nbd_requeue_cmd(struct nbd_cmd *cmd)
+{
+       struct request *req = blk_mq_rq_from_pdu(cmd);
+
+       if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
+               blk_mq_requeue_request(req, true);
+}
+
+#define NBD_COOKIE_BITS 32
+
+static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
+{
+       struct request *req = blk_mq_rq_from_pdu(cmd);
+       u32 tag = blk_mq_unique_tag(req);
+       u64 cookie = cmd->cmd_cookie;
+
+       return (cookie << NBD_COOKIE_BITS) | tag;
+}
+
+static u32 nbd_handle_to_tag(u64 handle)
+{
+       return (u32)handle;
+}
+
+static u32 nbd_handle_to_cookie(u64 handle)
+{
+       return (u32)(handle >> NBD_COOKIE_BITS);
+}
+
 static const char *nbdcmd_to_ascii(int cmd)
 {
        switch (cmd) {
@@ -173,9 +208,12 @@ static const struct device_attribute pid_attr = {
 static void nbd_dev_remove(struct nbd_device *nbd)
 {
        struct gendisk *disk = nbd->disk;
+       struct request_queue *q;
+
        if (disk) {
+               q = disk->queue;
                del_gendisk(disk);
-               blk_cleanup_queue(disk->queue);
+               blk_cleanup_queue(q);
                blk_mq_free_tag_set(&nbd->tag_set);
                disk->private_data = NULL;
                put_disk(disk);
@@ -231,9 +269,19 @@ static void nbd_size_clear(struct nbd_device *nbd)
 static void nbd_size_update(struct nbd_device *nbd)
 {
        struct nbd_config *config = nbd->config;
+       struct block_device *bdev = bdget_disk(nbd->disk, 0);
+
        blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
        blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
        set_capacity(nbd->disk, config->bytesize >> 9);
+       if (bdev) {
+               if (bdev->bd_disk) {
+                       bd_set_size(bdev, config->bytesize);
+                       set_blocksize(bdev, config->blksize);
+               } else
+                       bdev->bd_invalidated = 1;
+               bdput(bdev);
+       }
        kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
 
@@ -243,6 +291,8 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
        struct nbd_config *config = nbd->config;
        config->blksize = blocksize;
        config->bytesize = blocksize * nr_blocks;
+       if (nbd->task_recv != NULL)
+               nbd_size_update(nbd);
 }
 
 static void nbd_complete_rq(struct request *req)
@@ -290,6 +340,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        }
        config = nbd->config;
 
+       if (!mutex_trylock(&cmd->lock))
+               return BLK_EH_RESET_TIMER;
+
        if (config->num_connections > 1) {
                dev_err_ratelimited(nbd_to_dev(nbd),
                                    "Connection timed out, retrying\n");
@@ -312,7 +365,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
                                        nbd_mark_nsock_dead(nbd, nsock, 1);
                                mutex_unlock(&nsock->tx_lock);
                        }
-                       blk_mq_requeue_request(req, true);
+                       mutex_unlock(&cmd->lock);
+                       nbd_requeue_cmd(cmd);
                        nbd_config_put(nbd);
                        return BLK_EH_NOT_HANDLED;
                }
@@ -322,6 +376,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        }
        set_bit(NBD_TIMEDOUT, &config->runtime_flags);
        cmd->status = BLK_STS_IOERR;
+       mutex_unlock(&cmd->lock);
        sock_shutdown(nbd);
        nbd_config_put(nbd);
 
@@ -398,9 +453,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
        struct iov_iter from;
        unsigned long size = blk_rq_bytes(req);
        struct bio *bio;
+       u64 handle;
        u32 type;
        u32 nbd_cmd_flags = 0;
-       u32 tag = blk_mq_unique_tag(req);
        int sent = nsock->sent, skip = 0;
 
        iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
@@ -442,6 +497,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                        goto send_pages;
                }
                iov_iter_advance(&from, sent);
+       } else {
+               cmd->cmd_cookie++;
        }
        cmd->index = index;
        cmd->cookie = nsock->cookie;
@@ -450,7 +507,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
                request.len = htonl(size);
        }
-       memcpy(request.handle, &tag, sizeof(tag));
+       handle = nbd_cmd_handle(cmd);
+       memcpy(request.handle, &handle, sizeof(handle));
 
        dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
                cmd, nbdcmd_to_ascii(type),
@@ -468,6 +526,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                                nsock->pending = req;
                                nsock->sent = sent;
                        }
+                       set_bit(NBD_CMD_REQUEUED, &cmd->flags);
                        return BLK_STS_RESOURCE;
                }
                dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -509,6 +568,7 @@ send_pages:
                                         */
                                        nsock->pending = req;
                                        nsock->sent = sent;
+                                       set_bit(NBD_CMD_REQUEUED, &cmd->flags);
                                        return BLK_STS_RESOURCE;
                                }
                                dev_err(disk_to_dev(nbd->disk),
@@ -541,10 +601,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
        struct nbd_reply reply;
        struct nbd_cmd *cmd;
        struct request *req = NULL;
+       u64 handle;
        u16 hwq;
        u32 tag;
        struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
        struct iov_iter to;
+       int ret = 0;
 
        reply.magic = 0;
        iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
@@ -562,8 +624,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                return ERR_PTR(-EPROTO);
        }
 
-       memcpy(&tag, reply.handle, sizeof(u32));
-
+       memcpy(&handle, reply.handle, sizeof(handle));
+       tag = nbd_handle_to_tag(handle);
        hwq = blk_mq_unique_tag_to_hwq(tag);
        if (hwq < nbd->tag_set.nr_hw_queues)
                req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
@@ -574,11 +636,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                return ERR_PTR(-ENOENT);
        }
        cmd = blk_mq_rq_to_pdu(req);
+
+       mutex_lock(&cmd->lock);
+       if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
+               dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
+                       req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
+               ret = -ENOENT;
+               goto out;
+       }
+       if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
+               dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
+                       req);
+               ret = -ENOENT;
+               goto out;
+       }
        if (ntohl(reply.error)) {
                dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
                        ntohl(reply.error));
                cmd->status = BLK_STS_IOERR;
-               return cmd;
+               goto out;
        }
 
        dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
@@ -603,18 +679,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                                if (nbd_disconnected(config) ||
                                    config->num_connections <= 1) {
                                        cmd->status = BLK_STS_IOERR;
-                                       return cmd;
+                                       goto out;
                                }
-                               return ERR_PTR(-EIO);
+                               ret = -EIO;
+                               goto out;
                        }
                        dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
                                cmd, bvec.bv_len);
                }
-       } else {
-               /* See the comment in nbd_queue_rq. */
-               wait_for_completion(&cmd->send_complete);
        }
-       return cmd;
+out:
+       mutex_unlock(&cmd->lock);
+       return ret ? ERR_PTR(ret) : cmd;
 }
 
 static void recv_work(struct work_struct *work)
@@ -777,7 +853,7 @@ again:
         */
        blk_mq_start_request(req);
        if (unlikely(nsock->pending && nsock->pending != req)) {
-               blk_mq_requeue_request(req, true);
+               nbd_requeue_cmd(cmd);
                ret = 0;
                goto out;
        }
@@ -790,7 +866,7 @@ again:
                dev_err_ratelimited(disk_to_dev(nbd->disk),
                                    "Request send failed, requeueing\n");
                nbd_mark_nsock_dead(nbd, nsock, 1);
-               blk_mq_requeue_request(req, true);
+               nbd_requeue_cmd(cmd);
                ret = 0;
        }
 out:
@@ -814,7 +890,8 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
         * that the server is misbehaving (or there was an error) before we're
         * done sending everything over the wire.
         */
-       init_completion(&cmd->send_complete);
+       mutex_lock(&cmd->lock);
+       clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
 
        /* We can be called directly from the user space process, which means we
         * could possibly have signals pending so our sendmsg will fail.  In
@@ -826,7 +903,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
                ret = BLK_STS_IOERR;
        else if (!ret)
                ret = BLK_STS_OK;
-       complete(&cmd->send_complete);
+       mutex_unlock(&cmd->lock);
 
        return ret;
 }
@@ -950,10 +1027,6 @@ static void nbd_bdev_reset(struct block_device *bdev)
        if (bdev->bd_openers > 1)
                return;
        bd_set_size(bdev, 0);
-       if (max_part > 0) {
-               blkdev_reread_part(bdev);
-               bdev->bd_invalidated = 1;
-       }
 }
 
 static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1109,7 +1182,6 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
        if (ret)
                return ret;
 
-       bd_set_size(bdev, config->bytesize);
        if (max_part)
                bdev->bd_invalidated = 1;
        mutex_unlock(&nbd->config_lock);
@@ -1153,6 +1225,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
        case NBD_SET_SOCK:
                return nbd_add_socket(nbd, arg, false);
        case NBD_SET_BLKSIZE:
+               if (!arg || !is_power_of_2(arg) || arg < 512 ||
+                   arg > PAGE_SIZE)
+                       return -EINVAL;
                nbd_size_set(nbd, arg,
                             div_s64(config->bytesize, arg));
                return 0;
@@ -1269,6 +1344,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
                refcount_set(&nbd->config_refs, 1);
                refcount_inc(&nbd->refs);
                mutex_unlock(&nbd->config_lock);
+               bdev->bd_invalidated = 1;
+       } else if (nbd_disconnected(nbd->config)) {
+               bdev->bd_invalidated = 1;
        }
 out:
        mutex_unlock(&nbd_index_mutex);
@@ -1278,6 +1356,12 @@ out:
 static void nbd_release(struct gendisk *disk, fmode_t mode)
 {
        struct nbd_device *nbd = disk->private_data;
+       struct block_device *bdev = bdget_disk(disk, 0);
+
+       if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+                       bdev->bd_openers == 0)
+               nbd_disconnect_and_put(nbd);
+
        nbd_config_put(nbd);
        nbd_put(nbd);
 }
@@ -1425,6 +1509,8 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
 {
        struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
        cmd->nbd = set->driver_data;
+       cmd->flags = 0;
+       mutex_init(&cmd->lock);
        return 0;
 }
 
@@ -1591,7 +1677,7 @@ again:
                        if (new_index < 0) {
                                mutex_unlock(&nbd_index_mutex);
                                printk(KERN_ERR "nbd: failed to add new device\n");
-                               return ret;
+                               return new_index;
                        }
                        nbd = idr_find(&nbd_index_idr, new_index);
                }
@@ -1677,6 +1763,10 @@ again:
                                &config->runtime_flags);
                        put_dev = true;
                }
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                               &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -1721,6 +1811,16 @@ out:
        return ret;
 }
 
+static void nbd_disconnect_and_put(struct nbd_device *nbd)
+{
+       mutex_lock(&nbd->config_lock);
+       nbd_disconnect(nbd);
+       mutex_unlock(&nbd->config_lock);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+}
+
 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
        struct nbd_device *nbd;
@@ -1753,12 +1853,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
                nbd_put(nbd);
                return 0;
        }
-       mutex_lock(&nbd->config_lock);
-       nbd_disconnect(nbd);
-       mutex_unlock(&nbd->config_lock);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
-                              &nbd->config->runtime_flags))
-               nbd_config_put(nbd);
+       nbd_disconnect_and_put(nbd);
        nbd_config_put(nbd);
        nbd_put(nbd);
        return 0;
@@ -1769,7 +1864,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        struct nbd_device *nbd = NULL;
        struct nbd_config *config;
        int index;
-       int ret = -EINVAL;
+       int ret = 0;
        bool put_dev = false;
 
        if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -1809,6 +1904,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
            !nbd->task_recv) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
+               ret = -EINVAL;
                goto out;
        }
 
@@ -1833,6 +1929,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
                                               &config->runtime_flags))
                                refcount_inc(&nbd->refs);
                }
+
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               } else {
+                       clear_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {