]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/nbd.c
nbd: convert to blkmq
[mirror_ubuntu-bionic-kernel.git] / drivers / block / nbd.c
CommitLineData
1da177e4
LT
1/*
2 * Network block device - make block devices work over TCP
3 *
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
a2531293 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
1da177e4
LT
8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
9 *
dbf492d6 10 * This file is released under GPLv2 or later.
1da177e4 11 *
dbf492d6 12 * (part of code stolen from loop.c)
1da177e4
LT
13 */
14
15#include <linux/major.h>
16
17#include <linux/blkdev.h>
18#include <linux/module.h>
19#include <linux/init.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/bio.h>
23#include <linux/stat.h>
24#include <linux/errno.h>
25#include <linux/file.h>
26#include <linux/ioctl.h>
2a48fc0a 27#include <linux/mutex.h>
4b2f0260
HX
28#include <linux/compiler.h>
29#include <linux/err.h>
30#include <linux/kernel.h>
5a0e3ad6 31#include <linux/slab.h>
1da177e4 32#include <net/sock.h>
91cf45f0 33#include <linux/net.h>
48cf6061 34#include <linux/kthread.h>
b9c495bb 35#include <linux/types.h>
30d53d9c 36#include <linux/debugfs.h>
fd8383fd 37#include <linux/blk-mq.h>
1da177e4 38
1da177e4
LT
39#include <asm/uaccess.h>
40#include <asm/types.h>
41
42#include <linux/nbd.h>
43
13e71d69 44struct nbd_device {
22d109c1 45 u32 flags;
13e71d69
MP
46 struct socket * sock; /* If == NULL, device is not ready, yet */
47 int magic;
48
fd8383fd
JB
49 atomic_t outstanding_cmds;
50 struct blk_mq_tag_set tag_set;
13e71d69
MP
51
52 struct mutex tx_lock;
53 struct gendisk *disk;
54 int blksize;
b9c495bb 55 loff_t bytesize;
13e71d69 56 int xmit_timeout;
1f7b5cf1 57 bool timedout;
696697cb 58 bool disconnect; /* a disconnect has been requested by user */
7e2893a1
MP
59
60 struct timer_list timeout_timer;
23272a67
MP
61 /* protects initialization and shutdown of the socket */
62 spinlock_t sock_lock;
7e2893a1
MP
63 struct task_struct *task_recv;
64 struct task_struct *task_send;
30d53d9c
MP
65
66#if IS_ENABLED(CONFIG_DEBUG_FS)
67 struct dentry *dbg_dir;
68#endif
13e71d69
MP
69};
70
fd8383fd
JB
71struct nbd_cmd {
72 struct nbd_device *nbd;
73 struct list_head list;
74};
75
30d53d9c
MP
76#if IS_ENABLED(CONFIG_DEBUG_FS)
77static struct dentry *nbd_dbg_dir;
78#endif
79
80#define nbd_name(nbd) ((nbd)->disk->disk_name)
81
f4507164 82#define NBD_MAGIC 0x68797548
1da177e4 83
9c7a4169 84static unsigned int nbds_max = 16;
20a8143e 85static struct nbd_device *nbd_dev;
d71a6d73 86static int max_part;
1da177e4 87
d18509f5 88static inline struct device *nbd_to_dev(struct nbd_device *nbd)
1da177e4 89{
d18509f5 90 return disk_to_dev(nbd->disk);
1da177e4
LT
91}
92
37091fdd
MP
93static bool nbd_is_connected(struct nbd_device *nbd)
94{
95 return !!nbd->task_recv;
96}
97
1da177e4
LT
98static const char *nbdcmd_to_ascii(int cmd)
99{
100 switch (cmd) {
101 case NBD_CMD_READ: return "read";
102 case NBD_CMD_WRITE: return "write";
103 case NBD_CMD_DISC: return "disconnect";
75f187ab 104 case NBD_CMD_FLUSH: return "flush";
a336d298 105 case NBD_CMD_TRIM: return "trim/discard";
1da177e4
LT
106 }
107 return "invalid";
108}
1da177e4 109
37091fdd
MP
110static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
111{
112 bdev->bd_inode->i_size = 0;
113 set_capacity(nbd->disk, 0);
114 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
115
116 return 0;
117}
118
119static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
120{
121 if (!nbd_is_connected(nbd))
122 return;
123
124 bdev->bd_inode->i_size = nbd->bytesize;
125 set_capacity(nbd->disk, nbd->bytesize >> 9);
126 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
127}
128
129static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
130 int blocksize, int nr_blocks)
131{
132 int ret;
133
134 ret = set_blocksize(bdev, blocksize);
135 if (ret)
136 return ret;
137
138 nbd->blksize = blocksize;
139 nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
140
141 nbd_size_update(nbd, bdev);
142
143 return 0;
144}
145
fd8383fd 146static void nbd_end_request(struct nbd_cmd *cmd)
1da177e4 147{
fd8383fd
JB
148 struct nbd_device *nbd = cmd->nbd;
149 struct request *req = blk_mq_rq_from_pdu(cmd);
097c94a4 150 int error = req->errors ? -EIO : 0;
1da177e4 151
fd8383fd 152 dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
d18509f5 153 error ? "failed" : "done");
1da177e4 154
fd8383fd
JB
155 atomic_dec(&nbd->outstanding_cmds);
156 blk_mq_complete_request(req, error);
1da177e4
LT
157}
158
e018e757
MP
159/*
160 * Forcibly shutdown the socket causing all listeners to error
161 */
36e47bee 162static void sock_shutdown(struct nbd_device *nbd)
7fdfd406 163{
23272a67
MP
164 spin_lock_irq(&nbd->sock_lock);
165
166 if (!nbd->sock) {
167 spin_unlock_irq(&nbd->sock_lock);
260bbce4 168 return;
23272a67 169 }
260bbce4
MP
170
171 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
172 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
23272a67 173 sockfd_put(nbd->sock);
260bbce4 174 nbd->sock = NULL;
23272a67
MP
175 spin_unlock_irq(&nbd->sock_lock);
176
177 del_timer(&nbd->timeout_timer);
7fdfd406
PC
178}
179
180static void nbd_xmit_timeout(unsigned long arg)
181{
7e2893a1 182 struct nbd_device *nbd = (struct nbd_device *)arg;
dcc909d9 183 unsigned long flags;
7e2893a1 184
fd8383fd 185 if (!atomic_read(&nbd->outstanding_cmds))
7e2893a1
MP
186 return;
187
23272a67 188 spin_lock_irqsave(&nbd->sock_lock, flags);
dcc909d9 189
1f7b5cf1 190 nbd->timedout = true;
7fdfd406 191
23272a67
MP
192 if (nbd->sock)
193 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
7e2893a1 194
23272a67 195 spin_unlock_irqrestore(&nbd->sock_lock, flags);
dcc909d9 196
23272a67 197 dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
7fdfd406
PC
198}
199
1da177e4
LT
200/*
201 * Send or receive packet.
202 */
f4507164 203static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
1da177e4
LT
204 int msg_flags)
205{
f4507164 206 struct socket *sock = nbd->sock;
1da177e4
LT
207 int result;
208 struct msghdr msg;
209 struct kvec iov;
7f338fe4 210 unsigned long pflags = current->flags;
1da177e4 211
ffc41cf8 212 if (unlikely(!sock)) {
f4507164 213 dev_err(disk_to_dev(nbd->disk),
7f1b90f9
WC
214 "Attempted %s on closed socket in sock_xmit\n",
215 (send ? "send" : "recv"));
ffc41cf8
MS
216 return -EINVAL;
217 }
218
7f338fe4 219 current->flags |= PF_MEMALLOC;
1da177e4 220 do {
7f338fe4 221 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
1da177e4
LT
222 iov.iov_base = buf;
223 iov.iov_len = size;
224 msg.msg_name = NULL;
225 msg.msg_namelen = 0;
226 msg.msg_control = NULL;
227 msg.msg_controllen = 0;
1da177e4
LT
228 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
229
7e2893a1 230 if (send)
1da177e4 231 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
7e2893a1 232 else
35fbf5bc
NK
233 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
234 msg.msg_flags);
1da177e4 235
1da177e4
LT
236 if (result <= 0) {
237 if (result == 0)
238 result = -EPIPE; /* short read */
239 break;
240 }
241 size -= result;
242 buf += result;
243 } while (size > 0);
244
7f338fe4 245 tsk_restore_flags(current, pflags, PF_MEMALLOC);
1da177e4 246
7e2893a1
MP
247 if (!send && nbd->xmit_timeout)
248 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
249
1da177e4
LT
250 return result;
251}
252
f4507164 253static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
1da177e4
LT
254 int flags)
255{
256 int result;
257 void *kaddr = kmap(bvec->bv_page);
f4507164
WG
258 result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
259 bvec->bv_len, flags);
1da177e4
LT
260 kunmap(bvec->bv_page);
261 return result;
262}
263
7fdfd406 264/* always call with the tx_lock held */
fd8383fd 265static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd)
1da177e4 266{
fd8383fd 267 struct request *req = blk_mq_rq_from_pdu(cmd);
5705f702 268 int result, flags;
1da177e4 269 struct nbd_request request;
1011c1b9 270 unsigned long size = blk_rq_bytes(req);
9dc6c806
CH
271 u32 type;
272
273 if (req->cmd_type == REQ_TYPE_DRV_PRIV)
274 type = NBD_CMD_DISC;
c2df40df 275 else if (req_op(req) == REQ_OP_DISCARD)
9dc6c806 276 type = NBD_CMD_TRIM;
3a5e02ce 277 else if (req_op(req) == REQ_OP_FLUSH)
9dc6c806
CH
278 type = NBD_CMD_FLUSH;
279 else if (rq_data_dir(req) == WRITE)
280 type = NBD_CMD_WRITE;
281 else
282 type = NBD_CMD_READ;
1da177e4 283
04cfac4e 284 memset(&request, 0, sizeof(request));
1da177e4 285 request.magic = htonl(NBD_REQUEST_MAGIC);
9dc6c806
CH
286 request.type = htonl(type);
287 if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
75f187ab
AB
288 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
289 request.len = htonl(size);
290 }
fd8383fd 291 memcpy(request.handle, &req->tag, sizeof(req->tag));
1da177e4 292
d18509f5 293 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
fd8383fd 294 cmd, nbdcmd_to_ascii(type),
d18509f5 295 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
f4507164 296 result = sock_xmit(nbd, 1, &request, sizeof(request),
9dc6c806 297 (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
1da177e4 298 if (result <= 0) {
f4507164 299 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 300 "Send control failed (result %d)\n", result);
dab5313a 301 return -EIO;
1da177e4
LT
302 }
303
9dc6c806 304 if (type == NBD_CMD_WRITE) {
5705f702 305 struct req_iterator iter;
7988613b 306 struct bio_vec bvec;
1da177e4
LT
307 /*
308 * we are really probing at internals to determine
309 * whether to set MSG_MORE or not...
310 */
5705f702 311 rq_for_each_segment(bvec, req, iter) {
6c92e699 312 flags = 0;
4550dd6c 313 if (!rq_iter_last(bvec, iter))
6c92e699 314 flags = MSG_MORE;
d18509f5 315 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
fd8383fd 316 cmd, bvec.bv_len);
7988613b 317 result = sock_send_bvec(nbd, &bvec, flags);
6c92e699 318 if (result <= 0) {
f4507164 319 dev_err(disk_to_dev(nbd->disk),
7f1b90f9
WC
320 "Send data failed (result %d)\n",
321 result);
dab5313a 322 return -EIO;
6c92e699 323 }
1da177e4
LT
324 }
325 }
1da177e4 326 return 0;
1da177e4
LT
327}
328
f4507164 329static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
1da177e4
LT
330{
331 int result;
332 void *kaddr = kmap(bvec->bv_page);
f4507164 333 result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
1da177e4
LT
334 MSG_WAITALL);
335 kunmap(bvec->bv_page);
336 return result;
337}
338
339/* NULL returned = something went wrong, inform userspace */
fd8383fd 340static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd)
1da177e4
LT
341{
342 int result;
343 struct nbd_reply reply;
fd8383fd
JB
344 struct nbd_cmd *cmd;
345 struct request *req = NULL;
346 u16 hwq;
347 int tag;
1da177e4
LT
348
349 reply.magic = 0;
f4507164 350 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
1da177e4 351 if (result <= 0) {
f4507164 352 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 353 "Receive control failed (result %d)\n", result);
19391830 354 return ERR_PTR(result);
1da177e4 355 }
e4b57e08
MF
356
357 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
f4507164 358 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
e4b57e08 359 (unsigned long)ntohl(reply.magic));
19391830 360 return ERR_PTR(-EPROTO);
e4b57e08
MF
361 }
362
fd8383fd 363 memcpy(&tag, reply.handle, sizeof(int));
4b2f0260 364
fd8383fd
JB
365 hwq = blk_mq_unique_tag_to_hwq(tag);
366 if (hwq < nbd->tag_set.nr_hw_queues)
367 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
368 blk_mq_unique_tag_to_tag(tag));
369 if (!req || !blk_mq_request_started(req)) {
370 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
371 tag, req);
372 return ERR_PTR(-ENOENT);
1da177e4 373 }
fd8383fd 374 cmd = blk_mq_rq_to_pdu(req);
1da177e4 375
1da177e4 376 if (ntohl(reply.error)) {
f4507164 377 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
7f1b90f9 378 ntohl(reply.error));
1da177e4 379 req->errors++;
fd8383fd 380 return cmd;
1da177e4
LT
381 }
382
fd8383fd 383 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
9dc6c806 384 if (rq_data_dir(req) != WRITE) {
5705f702 385 struct req_iterator iter;
7988613b 386 struct bio_vec bvec;
5705f702
N
387
388 rq_for_each_segment(bvec, req, iter) {
7988613b 389 result = sock_recv_bvec(nbd, &bvec);
6c92e699 390 if (result <= 0) {
f4507164 391 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
7f1b90f9 392 result);
6c92e699 393 req->errors++;
fd8383fd 394 return cmd;
6c92e699 395 }
d18509f5 396 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
fd8383fd 397 cmd, bvec.bv_len);
1da177e4
LT
398 }
399 }
fd8383fd 400 return cmd;
1da177e4
LT
401}
402
edfaa7c3
KS
403static ssize_t pid_show(struct device *dev,
404 struct device_attribute *attr, char *buf)
6b39bb65 405{
edfaa7c3 406 struct gendisk *disk = dev_to_disk(dev);
6521d39a 407 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
edfaa7c3 408
6521d39a 409 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
6b39bb65
PC
410}
411
edfaa7c3 412static struct device_attribute pid_attr = {
01e8ef11 413 .attr = { .name = "pid", .mode = S_IRUGO},
6b39bb65
PC
414 .show = pid_show,
415};
416
37091fdd 417static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
1da177e4 418{
fd8383fd 419 struct nbd_cmd *cmd;
84963048 420 int ret;
1da177e4 421
f4507164 422 BUG_ON(nbd->magic != NBD_MAGIC);
1da177e4 423
7f338fe4 424 sk_set_memalloc(nbd->sock->sk);
6521d39a 425
f4507164 426 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
84963048 427 if (ret) {
f4507164 428 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
84963048
WC
429 return ret;
430 }
6b39bb65 431
37091fdd
MP
432 nbd_size_update(nbd, bdev);
433
19391830 434 while (1) {
fd8383fd
JB
435 cmd = nbd_read_stat(nbd);
436 if (IS_ERR(cmd)) {
437 ret = PTR_ERR(cmd);
19391830
MP
438 break;
439 }
440
fd8383fd 441 nbd_end_request(cmd);
19391830 442 }
6b39bb65 443
37091fdd
MP
444 nbd_size_clear(nbd, bdev);
445
6521d39a 446 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
7e2893a1 447 return ret;
1da177e4
LT
448}
449
fd8383fd 450static void nbd_clear_req(struct request *req, void *data, bool reserved)
1da177e4 451{
fd8383fd 452 struct nbd_cmd *cmd;
1da177e4 453
fd8383fd
JB
454 if (!blk_mq_request_started(req))
455 return;
456 cmd = blk_mq_rq_to_pdu(req);
457 req->errors++;
458 nbd_end_request(cmd);
459}
460
461static void nbd_clear_que(struct nbd_device *nbd)
462{
f4507164 463 BUG_ON(nbd->magic != NBD_MAGIC);
1da177e4 464
4b2f0260 465 /*
f4507164 466 * Because we have set nbd->sock to NULL under the tx_lock, all
fd8383fd 467 * modifications to the list must have completed by now.
4b2f0260 468 */
f4507164 469 BUG_ON(nbd->sock);
4b2f0260 470
fd8383fd 471 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
e78273c8 472 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
1da177e4
LT
473}
474
7fdfd406 475
fd8383fd 476static void nbd_handle_cmd(struct nbd_cmd *cmd)
48cf6061 477{
fd8383fd
JB
478 struct request *req = blk_mq_rq_from_pdu(cmd);
479 struct nbd_device *nbd = cmd->nbd;
480
33659ebb 481 if (req->cmd_type != REQ_TYPE_FS)
48cf6061
LV
482 goto error_out;
483
9dc6c806
CH
484 if (rq_data_dir(req) == WRITE &&
485 (nbd->flags & NBD_FLAG_READ_ONLY)) {
486 dev_err(disk_to_dev(nbd->disk),
487 "Write on read-only\n");
488 goto error_out;
75f187ab
AB
489 }
490
48cf6061
LV
491 req->errors = 0;
492
f4507164 493 mutex_lock(&nbd->tx_lock);
fd8383fd 494 nbd->task_send = current;
f4507164
WG
495 if (unlikely(!nbd->sock)) {
496 mutex_unlock(&nbd->tx_lock);
497 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 498 "Attempted send on closed socket\n");
15746fca 499 goto error_out;
48cf6061
LV
500 }
501
fd8383fd 502 if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds))
7e2893a1
MP
503 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
504
fd8383fd
JB
505 atomic_inc(&nbd->outstanding_cmds);
506 if (nbd_send_cmd(nbd, cmd) != 0) {
f4507164 507 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
48cf6061 508 req->errors++;
fd8383fd 509 nbd_end_request(cmd);
48cf6061
LV
510 }
511
fd8383fd 512 nbd->task_send = NULL;
f4507164 513 mutex_unlock(&nbd->tx_lock);
48cf6061
LV
514
515 return;
516
517error_out:
518 req->errors++;
fd8383fd 519 nbd_end_request(cmd);
48cf6061
LV
520}
521
fd8383fd
JB
522static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
523 const struct blk_mq_queue_data *bd)
1da177e4 524{
fd8383fd 525 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1da177e4 526
fd8383fd
JB
527 blk_mq_start_request(bd->rq);
528 nbd_handle_cmd(cmd);
529 return BLK_MQ_RQ_QUEUE_OK;
1da177e4
LT
530}
531
23272a67
MP
532static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
533{
534 int ret = 0;
535
536 spin_lock_irq(&nbd->sock_lock);
537
538 if (nbd->sock) {
539 ret = -EBUSY;
540 goto out;
541 }
542
543 nbd->sock = sock;
544
545out:
546 spin_unlock_irq(&nbd->sock_lock);
547
548 return ret;
549}
550
0e4f0f6f
MP
551/* Reset all properties of an NBD device */
552static void nbd_reset(struct nbd_device *nbd)
553{
554 nbd->disconnect = false;
555 nbd->timedout = false;
556 nbd->blksize = 1024;
557 nbd->bytesize = 0;
558 set_capacity(nbd->disk, 0);
559 nbd->flags = 0;
560 nbd->xmit_timeout = 0;
561 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
562 del_timer_sync(&nbd->timeout_timer);
563}
564
565static void nbd_bdev_reset(struct block_device *bdev)
566{
567 set_device_ro(bdev, false);
568 bdev->bd_inode->i_size = 0;
569 if (max_part > 0) {
570 blkdev_reread_part(bdev);
571 bdev->bd_invalidated = 1;
572 }
573}
574
d02cf531
MP
575static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
576{
577 if (nbd->flags & NBD_FLAG_READ_ONLY)
578 set_device_ro(bdev, true);
579 if (nbd->flags & NBD_FLAG_SEND_TRIM)
580 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
581 if (nbd->flags & NBD_FLAG_SEND_FLUSH)
aafb1eec 582 blk_queue_write_cache(nbd->disk->queue, true, false);
d02cf531 583 else
aafb1eec 584 blk_queue_write_cache(nbd->disk->queue, false, false);
d02cf531
MP
585}
586
30d53d9c
MP
587static int nbd_dev_dbg_init(struct nbd_device *nbd);
588static void nbd_dev_dbg_close(struct nbd_device *nbd);
589
1a2ad211 590/* Must be called with tx_lock held */
1da177e4 591
f4507164 592static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
1a2ad211
PM
593 unsigned int cmd, unsigned long arg)
594{
1da177e4 595 switch (cmd) {
1a2ad211 596 case NBD_DISCONNECT: {
fd8383fd 597 struct request *sreq;
1a2ad211 598
f4507164 599 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
3a2d63f8
PB
600 if (!nbd->sock)
601 return -EINVAL;
1a2ad211 602
fd8383fd
JB
603 sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
604 if (!sreq)
605 return -ENOMEM;
606
3a2d63f8
PB
607 mutex_unlock(&nbd->tx_lock);
608 fsync_bdev(bdev);
609 mutex_lock(&nbd->tx_lock);
fd8383fd 610 sreq->cmd_type = REQ_TYPE_DRV_PRIV;
3a2d63f8
PB
611
612 /* Check again after getting mutex back. */
fd8383fd
JB
613 if (!nbd->sock) {
614 blk_mq_free_request(sreq);
1da177e4 615 return -EINVAL;
fd8383fd 616 }
3a2d63f8 617
696697cb 618 nbd->disconnect = true;
c378f70a 619
fd8383fd
JB
620 nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq));
621 blk_mq_free_request(sreq);
c378f70a 622 return 0;
1a2ad211 623 }
1da177e4 624
23272a67
MP
625 case NBD_CLEAR_SOCK:
626 sock_shutdown(nbd);
f4507164 627 nbd_clear_que(nbd);
3a2d63f8 628 kill_bdev(bdev);
1a2ad211 629 return 0;
1a2ad211
PM
630
631 case NBD_SET_SOCK: {
e2511578 632 int err;
23272a67
MP
633 struct socket *sock = sockfd_lookup(arg, &err);
634
635 if (!sock)
636 return err;
637
638 err = nbd_set_socket(nbd, sock);
639 if (!err && max_part)
640 bdev->bd_invalidated = 1;
641
642 return err;
1a2ad211
PM
643 }
644
37091fdd 645 case NBD_SET_BLKSIZE: {
5e454c67 646 loff_t bsize = div_s64(nbd->bytesize, arg);
37091fdd
MP
647
648 return nbd_size_set(nbd, bdev, arg, bsize);
649 }
1a2ad211 650
1da177e4 651 case NBD_SET_SIZE:
37091fdd
MP
652 return nbd_size_set(nbd, bdev, nbd->blksize,
653 arg / nbd->blksize);
654
655 case NBD_SET_SIZE_BLOCKS:
656 return nbd_size_set(nbd, bdev, nbd->blksize, arg);
1a2ad211 657
7fdfd406 658 case NBD_SET_TIMEOUT:
f4507164 659 nbd->xmit_timeout = arg * HZ;
7e2893a1
MP
660 if (arg)
661 mod_timer(&nbd->timeout_timer,
662 jiffies + nbd->xmit_timeout);
663 else
664 del_timer_sync(&nbd->timeout_timer);
665
7fdfd406 666 return 0;
1a2ad211 667
2f012508
PC
668 case NBD_SET_FLAGS:
669 nbd->flags = arg;
670 return 0;
671
1a2ad211 672 case NBD_DO_IT: {
1a2ad211
PM
673 int error;
674
6521d39a 675 if (nbd->task_recv)
c91192d6 676 return -EBUSY;
e2511578 677 if (!nbd->sock)
1da177e4 678 return -EINVAL;
1a2ad211 679
97240963
VN
680 /* We have to claim the device under the lock */
681 nbd->task_recv = current;
f4507164 682 mutex_unlock(&nbd->tx_lock);
1a2ad211 683
d02cf531 684 nbd_parse_flags(nbd, bdev);
a336d298 685
30d53d9c 686 nbd_dev_dbg_init(nbd);
37091fdd 687 error = nbd_thread_recv(nbd, bdev);
30d53d9c 688 nbd_dev_dbg_close(nbd);
1a2ad211 689
f4507164 690 mutex_lock(&nbd->tx_lock);
97240963 691 nbd->task_recv = NULL;
19391830 692
36e47bee 693 sock_shutdown(nbd);
f4507164 694 nbd_clear_que(nbd);
3a2d63f8 695 kill_bdev(bdev);
0e4f0f6f
MP
696 nbd_bdev_reset(bdev);
697
c378f70a 698 if (nbd->disconnect) /* user requested, ignore socket errors */
1f7b5cf1
MP
699 error = 0;
700 if (nbd->timedout)
701 error = -ETIMEDOUT;
702
0e4f0f6f
MP
703 nbd_reset(nbd);
704
19391830 705 return error;
1a2ad211
PM
706 }
707
1da177e4 708 case NBD_CLEAR_QUE:
4b2f0260
HX
709 /*
710 * This is for compatibility only. The queue is always cleared
711 * by NBD_DO_IT or NBD_CLEAR_SOCK.
712 */
1da177e4 713 return 0;
1a2ad211 714
1da177e4 715 case NBD_PRINT_DEBUG:
fd8383fd
JB
716 /*
717 * For compatibility only, we no longer keep a list of
718 * outstanding requests.
719 */
1da177e4
LT
720 return 0;
721 }
1a2ad211
PM
722 return -ENOTTY;
723}
724
725static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
726 unsigned int cmd, unsigned long arg)
727{
f4507164 728 struct nbd_device *nbd = bdev->bd_disk->private_data;
1a2ad211
PM
729 int error;
730
731 if (!capable(CAP_SYS_ADMIN))
732 return -EPERM;
733
f4507164 734 BUG_ON(nbd->magic != NBD_MAGIC);
1a2ad211 735
f4507164
WG
736 mutex_lock(&nbd->tx_lock);
737 error = __nbd_ioctl(bdev, nbd, cmd, arg);
738 mutex_unlock(&nbd->tx_lock);
1a2ad211
PM
739
740 return error;
1da177e4
LT
741}
742
83d5cde4 743static const struct block_device_operations nbd_fops =
1da177e4
LT
744{
745 .owner = THIS_MODULE,
8a6cfeb6 746 .ioctl = nbd_ioctl,
263a3df1 747 .compat_ioctl = nbd_ioctl,
1da177e4
LT
748};
749
30d53d9c
MP
750#if IS_ENABLED(CONFIG_DEBUG_FS)
751
752static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
753{
754 struct nbd_device *nbd = s->private;
755
756 if (nbd->task_recv)
757 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
758 if (nbd->task_send)
759 seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send));
760
761 return 0;
762}
763
764static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
765{
766 return single_open(file, nbd_dbg_tasks_show, inode->i_private);
767}
768
769static const struct file_operations nbd_dbg_tasks_ops = {
770 .open = nbd_dbg_tasks_open,
771 .read = seq_read,
772 .llseek = seq_lseek,
773 .release = single_release,
774};
775
776static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
777{
778 struct nbd_device *nbd = s->private;
779 u32 flags = nbd->flags;
780
781 seq_printf(s, "Hex: 0x%08x\n\n", flags);
782
783 seq_puts(s, "Known flags:\n");
784
785 if (flags & NBD_FLAG_HAS_FLAGS)
786 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
787 if (flags & NBD_FLAG_READ_ONLY)
788 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
789 if (flags & NBD_FLAG_SEND_FLUSH)
790 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
791 if (flags & NBD_FLAG_SEND_TRIM)
792 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
793
794 return 0;
795}
796
797static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
798{
799 return single_open(file, nbd_dbg_flags_show, inode->i_private);
800}
801
802static const struct file_operations nbd_dbg_flags_ops = {
803 .open = nbd_dbg_flags_open,
804 .read = seq_read,
805 .llseek = seq_lseek,
806 .release = single_release,
807};
808
809static int nbd_dev_dbg_init(struct nbd_device *nbd)
810{
811 struct dentry *dir;
27ea43fe
MP
812
813 if (!nbd_dbg_dir)
814 return -EIO;
30d53d9c
MP
815
816 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
27ea43fe
MP
817 if (!dir) {
818 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
819 nbd_name(nbd));
820 return -EIO;
30d53d9c
MP
821 }
822 nbd->dbg_dir = dir;
823
27ea43fe
MP
824 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
825 debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
826 debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
827 debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
d366a0ff 828 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
30d53d9c
MP
829
830 return 0;
831}
832
833static void nbd_dev_dbg_close(struct nbd_device *nbd)
834{
835 debugfs_remove_recursive(nbd->dbg_dir);
836}
837
838static int nbd_dbg_init(void)
839{
840 struct dentry *dbg_dir;
841
842 dbg_dir = debugfs_create_dir("nbd", NULL);
27ea43fe
MP
843 if (!dbg_dir)
844 return -EIO;
30d53d9c
MP
845
846 nbd_dbg_dir = dbg_dir;
847
848 return 0;
849}
850
851static void nbd_dbg_close(void)
852{
853 debugfs_remove_recursive(nbd_dbg_dir);
854}
855
856#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
857
858static int nbd_dev_dbg_init(struct nbd_device *nbd)
859{
860 return 0;
861}
862
863static void nbd_dev_dbg_close(struct nbd_device *nbd)
864{
865}
866
867static int nbd_dbg_init(void)
868{
869 return 0;
870}
871
872static void nbd_dbg_close(void)
873{
874}
875
876#endif
877
fd8383fd
JB
878static int nbd_init_request(void *data, struct request *rq,
879 unsigned int hctx_idx, unsigned int request_idx,
880 unsigned int numa_node)
881{
882 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
883
884 cmd->nbd = data;
885 INIT_LIST_HEAD(&cmd->list);
886 return 0;
887}
888
889static struct blk_mq_ops nbd_mq_ops = {
890 .queue_rq = nbd_queue_rq,
891 .map_queue = blk_mq_map_queue,
892 .init_request = nbd_init_request,
893};
894
1da177e4
LT
895/*
896 * And here should be modules and kernel interface
897 * (Just smiley confuses emacs :-)
898 */
899
900static int __init nbd_init(void)
901{
902 int err = -ENOMEM;
903 int i;
d71a6d73 904 int part_shift;
1da177e4 905
5b7b18cc 906 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
1da177e4 907
d71a6d73 908 if (max_part < 0) {
7742ce4a 909 printk(KERN_ERR "nbd: max_part must be >= 0\n");
d71a6d73
LV
910 return -EINVAL;
911 }
912
913 part_shift = 0;
5988ce23 914 if (max_part > 0) {
d71a6d73
LV
915 part_shift = fls(max_part);
916
5988ce23
NK
917 /*
918 * Adjust max_part according to part_shift as it is exported
919 * to user space so that user can know the max number of
920 * partition kernel should be able to manage.
921 *
922 * Note that -1 is required because partition 0 is reserved
923 * for the whole disk.
924 */
925 max_part = (1UL << part_shift) - 1;
926 }
927
3b271082
NK
928 if ((1UL << part_shift) > DISK_MAX_PARTS)
929 return -EINVAL;
930
931 if (nbds_max > 1UL << (MINORBITS - part_shift))
932 return -EINVAL;
933
ff6b8090
SM
934 nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
935 if (!nbd_dev)
936 return -ENOMEM;
937
40be0c28 938 for (i = 0; i < nbds_max; i++) {
d71a6d73 939 struct gendisk *disk = alloc_disk(1 << part_shift);
1da177e4
LT
940 if (!disk)
941 goto out;
942 nbd_dev[i].disk = disk;
fd8383fd
JB
943
944 nbd_dev[i].tag_set.ops = &nbd_mq_ops;
945 nbd_dev[i].tag_set.nr_hw_queues = 1;
946 nbd_dev[i].tag_set.queue_depth = 128;
947 nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
948 nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
949 nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
950 BLK_MQ_F_SG_MERGE;
951 nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
952
953 err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
954 if (err) {
955 put_disk(disk);
956 goto out;
957 }
958
1da177e4
LT
959 /*
960 * The new linux 2.5 block layer implementation requires
961 * every gendisk to have its very own request_queue struct.
962 * These structs are big so we dynamically allocate them.
963 */
fd8383fd 964 disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
1da177e4 965 if (!disk->queue) {
fd8383fd 966 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
967 put_disk(disk);
968 goto out;
969 }
fd8383fd 970
31dcfab0
JA
971 /*
972 * Tell the block layer that we are not a rotational device
973 */
974 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
b277da0a 975 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
a336d298 976 disk->queue->limits.discard_granularity = 512;
2bb4cd5c 977 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
a336d298 978 disk->queue->limits.discard_zeroes_data = 0;
078be02b
MB
979 blk_queue_max_hw_sectors(disk->queue, 65536);
980 disk->queue->limits.max_sectors = 256;
1da177e4
LT
981 }
982
983 if (register_blkdev(NBD_MAJOR, "nbd")) {
984 err = -EIO;
985 goto out;
986 }
987
988 printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
1da177e4 989
30d53d9c
MP
990 nbd_dbg_init();
991
40be0c28 992 for (i = 0; i < nbds_max; i++) {
1da177e4 993 struct gendisk *disk = nbd_dev[i].disk;
f4507164 994 nbd_dev[i].magic = NBD_MAGIC;
23272a67 995 spin_lock_init(&nbd_dev[i].sock_lock);
82d4dc5a 996 mutex_init(&nbd_dev[i].tx_lock);
7e2893a1
MP
997 init_timer(&nbd_dev[i].timeout_timer);
998 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
999 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
fd8383fd 1000 atomic_set(&nbd_dev[i].outstanding_cmds, 0);
1da177e4 1001 disk->major = NBD_MAJOR;
d71a6d73 1002 disk->first_minor = i << part_shift;
1da177e4
LT
1003 disk->fops = &nbd_fops;
1004 disk->private_data = &nbd_dev[i];
1da177e4 1005 sprintf(disk->disk_name, "nbd%d", i);
0e4f0f6f 1006 nbd_reset(&nbd_dev[i]);
1da177e4
LT
1007 add_disk(disk);
1008 }
1009
1010 return 0;
1011out:
1012 while (i--) {
fd8383fd 1013 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
1014 blk_cleanup_queue(nbd_dev[i].disk->queue);
1015 put_disk(nbd_dev[i].disk);
1016 }
f3944d61 1017 kfree(nbd_dev);
1da177e4
LT
1018 return err;
1019}
1020
1021static void __exit nbd_cleanup(void)
1022{
1023 int i;
30d53d9c
MP
1024
1025 nbd_dbg_close();
1026
40be0c28 1027 for (i = 0; i < nbds_max; i++) {
1da177e4 1028 struct gendisk *disk = nbd_dev[i].disk;
40be0c28 1029 nbd_dev[i].magic = 0;
1da177e4
LT
1030 if (disk) {
1031 del_gendisk(disk);
1032 blk_cleanup_queue(disk->queue);
fd8383fd 1033 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
1034 put_disk(disk);
1035 }
1036 }
1da177e4 1037 unregister_blkdev(NBD_MAJOR, "nbd");
f3944d61 1038 kfree(nbd_dev);
1da177e4
LT
1039 printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
1040}
1041
1042module_init(nbd_init);
1043module_exit(nbd_cleanup);
1044
1045MODULE_DESCRIPTION("Network Block Device");
1046MODULE_LICENSE("GPL");
1047
40be0c28 1048module_param(nbds_max, int, 0444);
d71a6d73
LV
1049MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
1050module_param(max_part, int, 0444);
1051MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");