]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/nbd.c
nbd: use flags instead of bool
[mirror_ubuntu-bionic-kernel.git] / drivers / block / nbd.c
CommitLineData
1da177e4
LT
1/*
2 * Network block device - make block devices work over TCP
3 *
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
a2531293 7 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
1da177e4
LT
8 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
9 *
dbf492d6 10 * This file is released under GPLv2 or later.
1da177e4 11 *
dbf492d6 12 * (part of code stolen from loop.c)
1da177e4
LT
13 */
14
15#include <linux/major.h>
16
17#include <linux/blkdev.h>
18#include <linux/module.h>
19#include <linux/init.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/bio.h>
23#include <linux/stat.h>
24#include <linux/errno.h>
25#include <linux/file.h>
26#include <linux/ioctl.h>
2a48fc0a 27#include <linux/mutex.h>
4b2f0260
HX
28#include <linux/compiler.h>
29#include <linux/err.h>
30#include <linux/kernel.h>
5a0e3ad6 31#include <linux/slab.h>
1da177e4 32#include <net/sock.h>
91cf45f0 33#include <linux/net.h>
48cf6061 34#include <linux/kthread.h>
b9c495bb 35#include <linux/types.h>
30d53d9c 36#include <linux/debugfs.h>
fd8383fd 37#include <linux/blk-mq.h>
1da177e4 38
1da177e4
LT
39#include <asm/uaccess.h>
40#include <asm/types.h>
41
42#include <linux/nbd.h>
43
9b4a6ba9
JB
44#define NBD_TIMEDOUT 0
45#define NBD_DISCONNECT_REQUESTED 1
46
13e71d69 47struct nbd_device {
22d109c1 48 u32 flags;
9b4a6ba9 49 unsigned long runtime_flags;
13e71d69
MP
50 struct socket * sock; /* If == NULL, device is not ready, yet */
51 int magic;
52
fd8383fd
JB
53 atomic_t outstanding_cmds;
54 struct blk_mq_tag_set tag_set;
13e71d69
MP
55
56 struct mutex tx_lock;
57 struct gendisk *disk;
58 int blksize;
b9c495bb 59 loff_t bytesize;
13e71d69 60 int xmit_timeout;
7e2893a1
MP
61
62 struct timer_list timeout_timer;
23272a67
MP
63 /* protects initialization and shutdown of the socket */
64 spinlock_t sock_lock;
7e2893a1
MP
65 struct task_struct *task_recv;
66 struct task_struct *task_send;
30d53d9c
MP
67
68#if IS_ENABLED(CONFIG_DEBUG_FS)
69 struct dentry *dbg_dir;
70#endif
13e71d69
MP
71};
72
fd8383fd
JB
73struct nbd_cmd {
74 struct nbd_device *nbd;
75 struct list_head list;
76};
77
30d53d9c
MP
78#if IS_ENABLED(CONFIG_DEBUG_FS)
79static struct dentry *nbd_dbg_dir;
80#endif
81
82#define nbd_name(nbd) ((nbd)->disk->disk_name)
83
f4507164 84#define NBD_MAGIC 0x68797548
1da177e4 85
9c7a4169 86static unsigned int nbds_max = 16;
20a8143e 87static struct nbd_device *nbd_dev;
d71a6d73 88static int max_part;
1da177e4 89
d18509f5 90static inline struct device *nbd_to_dev(struct nbd_device *nbd)
1da177e4 91{
d18509f5 92 return disk_to_dev(nbd->disk);
1da177e4
LT
93}
94
37091fdd
MP
95static bool nbd_is_connected(struct nbd_device *nbd)
96{
97 return !!nbd->task_recv;
98}
99
1da177e4
LT
100static const char *nbdcmd_to_ascii(int cmd)
101{
102 switch (cmd) {
103 case NBD_CMD_READ: return "read";
104 case NBD_CMD_WRITE: return "write";
105 case NBD_CMD_DISC: return "disconnect";
75f187ab 106 case NBD_CMD_FLUSH: return "flush";
a336d298 107 case NBD_CMD_TRIM: return "trim/discard";
1da177e4
LT
108 }
109 return "invalid";
110}
1da177e4 111
37091fdd
MP
112static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
113{
114 bdev->bd_inode->i_size = 0;
115 set_capacity(nbd->disk, 0);
116 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
117
118 return 0;
119}
120
121static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
122{
123 if (!nbd_is_connected(nbd))
124 return;
125
126 bdev->bd_inode->i_size = nbd->bytesize;
127 set_capacity(nbd->disk, nbd->bytesize >> 9);
128 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
129}
130
131static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
132 int blocksize, int nr_blocks)
133{
134 int ret;
135
136 ret = set_blocksize(bdev, blocksize);
137 if (ret)
138 return ret;
139
140 nbd->blksize = blocksize;
141 nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks;
142
143 nbd_size_update(nbd, bdev);
144
145 return 0;
146}
147
fd8383fd 148static void nbd_end_request(struct nbd_cmd *cmd)
1da177e4 149{
fd8383fd
JB
150 struct nbd_device *nbd = cmd->nbd;
151 struct request *req = blk_mq_rq_from_pdu(cmd);
097c94a4 152 int error = req->errors ? -EIO : 0;
1da177e4 153
fd8383fd 154 dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
d18509f5 155 error ? "failed" : "done");
1da177e4 156
fd8383fd
JB
157 atomic_dec(&nbd->outstanding_cmds);
158 blk_mq_complete_request(req, error);
1da177e4
LT
159}
160
e018e757
MP
161/*
162 * Forcibly shutdown the socket causing all listeners to error
163 */
36e47bee 164static void sock_shutdown(struct nbd_device *nbd)
7fdfd406 165{
c2611898
JB
166 struct socket *sock;
167
23272a67
MP
168 spin_lock_irq(&nbd->sock_lock);
169
170 if (!nbd->sock) {
171 spin_unlock_irq(&nbd->sock_lock);
260bbce4 172 return;
23272a67 173 }
260bbce4 174
c2611898 175 sock = nbd->sock;
260bbce4 176 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
260bbce4 177 nbd->sock = NULL;
23272a67
MP
178 spin_unlock_irq(&nbd->sock_lock);
179
c2611898
JB
180 kernel_sock_shutdown(sock, SHUT_RDWR);
181 sockfd_put(sock);
182
23272a67 183 del_timer(&nbd->timeout_timer);
7fdfd406
PC
184}
185
186static void nbd_xmit_timeout(unsigned long arg)
187{
7e2893a1 188 struct nbd_device *nbd = (struct nbd_device *)arg;
c2611898 189 struct socket *sock = NULL;
dcc909d9 190 unsigned long flags;
7e2893a1 191
fd8383fd 192 if (!atomic_read(&nbd->outstanding_cmds))
7e2893a1
MP
193 return;
194
23272a67 195 spin_lock_irqsave(&nbd->sock_lock, flags);
dcc909d9 196
9b4a6ba9 197 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
7fdfd406 198
c2611898
JB
199 if (nbd->sock) {
200 sock = nbd->sock;
201 get_file(sock->file);
202 }
7e2893a1 203
23272a67 204 spin_unlock_irqrestore(&nbd->sock_lock, flags);
c2611898
JB
205 if (sock) {
206 kernel_sock_shutdown(sock, SHUT_RDWR);
207 sockfd_put(sock);
208 }
dcc909d9 209
23272a67 210 dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
7fdfd406
PC
211}
212
1da177e4
LT
213/*
214 * Send or receive packet.
215 */
f4507164 216static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
1da177e4
LT
217 int msg_flags)
218{
f4507164 219 struct socket *sock = nbd->sock;
1da177e4
LT
220 int result;
221 struct msghdr msg;
222 struct kvec iov;
7f338fe4 223 unsigned long pflags = current->flags;
1da177e4 224
ffc41cf8 225 if (unlikely(!sock)) {
f4507164 226 dev_err(disk_to_dev(nbd->disk),
7f1b90f9
WC
227 "Attempted %s on closed socket in sock_xmit\n",
228 (send ? "send" : "recv"));
ffc41cf8
MS
229 return -EINVAL;
230 }
231
7f338fe4 232 current->flags |= PF_MEMALLOC;
1da177e4 233 do {
7f338fe4 234 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
1da177e4
LT
235 iov.iov_base = buf;
236 iov.iov_len = size;
237 msg.msg_name = NULL;
238 msg.msg_namelen = 0;
239 msg.msg_control = NULL;
240 msg.msg_controllen = 0;
1da177e4
LT
241 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
242
7e2893a1 243 if (send)
1da177e4 244 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
7e2893a1 245 else
35fbf5bc
NK
246 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
247 msg.msg_flags);
1da177e4 248
1da177e4
LT
249 if (result <= 0) {
250 if (result == 0)
251 result = -EPIPE; /* short read */
252 break;
253 }
254 size -= result;
255 buf += result;
256 } while (size > 0);
257
7f338fe4 258 tsk_restore_flags(current, pflags, PF_MEMALLOC);
1da177e4 259
7e2893a1
MP
260 if (!send && nbd->xmit_timeout)
261 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
262
1da177e4
LT
263 return result;
264}
265
f4507164 266static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
1da177e4
LT
267 int flags)
268{
269 int result;
270 void *kaddr = kmap(bvec->bv_page);
f4507164
WG
271 result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
272 bvec->bv_len, flags);
1da177e4
LT
273 kunmap(bvec->bv_page);
274 return result;
275}
276
7fdfd406 277/* always call with the tx_lock held */
fd8383fd 278static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd)
1da177e4 279{
fd8383fd 280 struct request *req = blk_mq_rq_from_pdu(cmd);
5705f702 281 int result, flags;
1da177e4 282 struct nbd_request request;
1011c1b9 283 unsigned long size = blk_rq_bytes(req);
9dc6c806
CH
284 u32 type;
285
286 if (req->cmd_type == REQ_TYPE_DRV_PRIV)
287 type = NBD_CMD_DISC;
c2df40df 288 else if (req_op(req) == REQ_OP_DISCARD)
9dc6c806 289 type = NBD_CMD_TRIM;
3a5e02ce 290 else if (req_op(req) == REQ_OP_FLUSH)
9dc6c806
CH
291 type = NBD_CMD_FLUSH;
292 else if (rq_data_dir(req) == WRITE)
293 type = NBD_CMD_WRITE;
294 else
295 type = NBD_CMD_READ;
1da177e4 296
04cfac4e 297 memset(&request, 0, sizeof(request));
1da177e4 298 request.magic = htonl(NBD_REQUEST_MAGIC);
9dc6c806
CH
299 request.type = htonl(type);
300 if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
75f187ab
AB
301 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
302 request.len = htonl(size);
303 }
fd8383fd 304 memcpy(request.handle, &req->tag, sizeof(req->tag));
1da177e4 305
d18509f5 306 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
fd8383fd 307 cmd, nbdcmd_to_ascii(type),
d18509f5 308 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
f4507164 309 result = sock_xmit(nbd, 1, &request, sizeof(request),
9dc6c806 310 (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
1da177e4 311 if (result <= 0) {
f4507164 312 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 313 "Send control failed (result %d)\n", result);
dab5313a 314 return -EIO;
1da177e4
LT
315 }
316
9dc6c806 317 if (type == NBD_CMD_WRITE) {
5705f702 318 struct req_iterator iter;
7988613b 319 struct bio_vec bvec;
1da177e4
LT
320 /*
321 * we are really probing at internals to determine
322 * whether to set MSG_MORE or not...
323 */
5705f702 324 rq_for_each_segment(bvec, req, iter) {
6c92e699 325 flags = 0;
4550dd6c 326 if (!rq_iter_last(bvec, iter))
6c92e699 327 flags = MSG_MORE;
d18509f5 328 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
fd8383fd 329 cmd, bvec.bv_len);
7988613b 330 result = sock_send_bvec(nbd, &bvec, flags);
6c92e699 331 if (result <= 0) {
f4507164 332 dev_err(disk_to_dev(nbd->disk),
7f1b90f9
WC
333 "Send data failed (result %d)\n",
334 result);
dab5313a 335 return -EIO;
6c92e699 336 }
1da177e4
LT
337 }
338 }
1da177e4 339 return 0;
1da177e4
LT
340}
341
f4507164 342static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
1da177e4
LT
343{
344 int result;
345 void *kaddr = kmap(bvec->bv_page);
f4507164 346 result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
1da177e4
LT
347 MSG_WAITALL);
348 kunmap(bvec->bv_page);
349 return result;
350}
351
352/* NULL returned = something went wrong, inform userspace */
fd8383fd 353static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd)
1da177e4
LT
354{
355 int result;
356 struct nbd_reply reply;
fd8383fd
JB
357 struct nbd_cmd *cmd;
358 struct request *req = NULL;
359 u16 hwq;
360 int tag;
1da177e4
LT
361
362 reply.magic = 0;
f4507164 363 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
1da177e4 364 if (result <= 0) {
f4507164 365 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 366 "Receive control failed (result %d)\n", result);
19391830 367 return ERR_PTR(result);
1da177e4 368 }
e4b57e08
MF
369
370 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
f4507164 371 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
e4b57e08 372 (unsigned long)ntohl(reply.magic));
19391830 373 return ERR_PTR(-EPROTO);
e4b57e08
MF
374 }
375
fd8383fd 376 memcpy(&tag, reply.handle, sizeof(int));
4b2f0260 377
fd8383fd
JB
378 hwq = blk_mq_unique_tag_to_hwq(tag);
379 if (hwq < nbd->tag_set.nr_hw_queues)
380 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
381 blk_mq_unique_tag_to_tag(tag));
382 if (!req || !blk_mq_request_started(req)) {
383 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
384 tag, req);
385 return ERR_PTR(-ENOENT);
1da177e4 386 }
fd8383fd 387 cmd = blk_mq_rq_to_pdu(req);
1da177e4 388
1da177e4 389 if (ntohl(reply.error)) {
f4507164 390 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
7f1b90f9 391 ntohl(reply.error));
1da177e4 392 req->errors++;
fd8383fd 393 return cmd;
1da177e4
LT
394 }
395
fd8383fd 396 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
9dc6c806 397 if (rq_data_dir(req) != WRITE) {
5705f702 398 struct req_iterator iter;
7988613b 399 struct bio_vec bvec;
5705f702
N
400
401 rq_for_each_segment(bvec, req, iter) {
7988613b 402 result = sock_recv_bvec(nbd, &bvec);
6c92e699 403 if (result <= 0) {
f4507164 404 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
7f1b90f9 405 result);
6c92e699 406 req->errors++;
fd8383fd 407 return cmd;
6c92e699 408 }
d18509f5 409 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
fd8383fd 410 cmd, bvec.bv_len);
1da177e4
LT
411 }
412 }
fd8383fd 413 return cmd;
1da177e4
LT
414}
415
edfaa7c3
KS
416static ssize_t pid_show(struct device *dev,
417 struct device_attribute *attr, char *buf)
6b39bb65 418{
edfaa7c3 419 struct gendisk *disk = dev_to_disk(dev);
6521d39a 420 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
edfaa7c3 421
6521d39a 422 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
6b39bb65
PC
423}
424
edfaa7c3 425static struct device_attribute pid_attr = {
01e8ef11 426 .attr = { .name = "pid", .mode = S_IRUGO},
6b39bb65
PC
427 .show = pid_show,
428};
429
37091fdd 430static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
1da177e4 431{
fd8383fd 432 struct nbd_cmd *cmd;
84963048 433 int ret;
1da177e4 434
f4507164 435 BUG_ON(nbd->magic != NBD_MAGIC);
1da177e4 436
7f338fe4 437 sk_set_memalloc(nbd->sock->sk);
6521d39a 438
f4507164 439 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
84963048 440 if (ret) {
f4507164 441 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
84963048
WC
442 return ret;
443 }
6b39bb65 444
37091fdd
MP
445 nbd_size_update(nbd, bdev);
446
19391830 447 while (1) {
fd8383fd
JB
448 cmd = nbd_read_stat(nbd);
449 if (IS_ERR(cmd)) {
450 ret = PTR_ERR(cmd);
19391830
MP
451 break;
452 }
453
fd8383fd 454 nbd_end_request(cmd);
19391830 455 }
6b39bb65 456
37091fdd
MP
457 nbd_size_clear(nbd, bdev);
458
6521d39a 459 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
7e2893a1 460 return ret;
1da177e4
LT
461}
462
fd8383fd 463static void nbd_clear_req(struct request *req, void *data, bool reserved)
1da177e4 464{
fd8383fd 465 struct nbd_cmd *cmd;
1da177e4 466
fd8383fd
JB
467 if (!blk_mq_request_started(req))
468 return;
469 cmd = blk_mq_rq_to_pdu(req);
470 req->errors++;
471 nbd_end_request(cmd);
472}
473
474static void nbd_clear_que(struct nbd_device *nbd)
475{
f4507164 476 BUG_ON(nbd->magic != NBD_MAGIC);
1da177e4 477
4b2f0260 478 /*
f4507164 479 * Because we have set nbd->sock to NULL under the tx_lock, all
fd8383fd 480 * modifications to the list must have completed by now.
4b2f0260 481 */
f4507164 482 BUG_ON(nbd->sock);
4b2f0260 483
fd8383fd 484 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
e78273c8 485 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
1da177e4
LT
486}
487
7fdfd406 488
fd8383fd 489static void nbd_handle_cmd(struct nbd_cmd *cmd)
48cf6061 490{
fd8383fd
JB
491 struct request *req = blk_mq_rq_from_pdu(cmd);
492 struct nbd_device *nbd = cmd->nbd;
493
33659ebb 494 if (req->cmd_type != REQ_TYPE_FS)
48cf6061
LV
495 goto error_out;
496
9dc6c806
CH
497 if (rq_data_dir(req) == WRITE &&
498 (nbd->flags & NBD_FLAG_READ_ONLY)) {
499 dev_err(disk_to_dev(nbd->disk),
500 "Write on read-only\n");
501 goto error_out;
75f187ab
AB
502 }
503
48cf6061
LV
504 req->errors = 0;
505
f4507164 506 mutex_lock(&nbd->tx_lock);
fd8383fd 507 nbd->task_send = current;
f4507164
WG
508 if (unlikely(!nbd->sock)) {
509 mutex_unlock(&nbd->tx_lock);
510 dev_err(disk_to_dev(nbd->disk),
7f1b90f9 511 "Attempted send on closed socket\n");
15746fca 512 goto error_out;
48cf6061
LV
513 }
514
fd8383fd 515 if (nbd->xmit_timeout && !atomic_read(&nbd->outstanding_cmds))
7e2893a1
MP
516 mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
517
fd8383fd
JB
518 atomic_inc(&nbd->outstanding_cmds);
519 if (nbd_send_cmd(nbd, cmd) != 0) {
f4507164 520 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
48cf6061 521 req->errors++;
fd8383fd 522 nbd_end_request(cmd);
48cf6061
LV
523 }
524
fd8383fd 525 nbd->task_send = NULL;
f4507164 526 mutex_unlock(&nbd->tx_lock);
48cf6061
LV
527
528 return;
529
530error_out:
531 req->errors++;
fd8383fd 532 nbd_end_request(cmd);
48cf6061
LV
533}
534
fd8383fd
JB
535static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
536 const struct blk_mq_queue_data *bd)
1da177e4 537{
fd8383fd 538 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1da177e4 539
fd8383fd
JB
540 blk_mq_start_request(bd->rq);
541 nbd_handle_cmd(cmd);
542 return BLK_MQ_RQ_QUEUE_OK;
1da177e4
LT
543}
544
23272a67
MP
545static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
546{
547 int ret = 0;
548
549 spin_lock_irq(&nbd->sock_lock);
550
551 if (nbd->sock) {
552 ret = -EBUSY;
553 goto out;
554 }
555
556 nbd->sock = sock;
557
558out:
559 spin_unlock_irq(&nbd->sock_lock);
560
561 return ret;
562}
563
0e4f0f6f
MP
564/* Reset all properties of an NBD device */
565static void nbd_reset(struct nbd_device *nbd)
566{
9b4a6ba9 567 nbd->runtime_flags = 0;
0e4f0f6f
MP
568 nbd->blksize = 1024;
569 nbd->bytesize = 0;
570 set_capacity(nbd->disk, 0);
571 nbd->flags = 0;
572 nbd->xmit_timeout = 0;
573 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
574 del_timer_sync(&nbd->timeout_timer);
575}
576
577static void nbd_bdev_reset(struct block_device *bdev)
578{
579 set_device_ro(bdev, false);
580 bdev->bd_inode->i_size = 0;
581 if (max_part > 0) {
582 blkdev_reread_part(bdev);
583 bdev->bd_invalidated = 1;
584 }
585}
586
d02cf531
MP
587static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
588{
589 if (nbd->flags & NBD_FLAG_READ_ONLY)
590 set_device_ro(bdev, true);
591 if (nbd->flags & NBD_FLAG_SEND_TRIM)
592 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
593 if (nbd->flags & NBD_FLAG_SEND_FLUSH)
aafb1eec 594 blk_queue_write_cache(nbd->disk->queue, true, false);
d02cf531 595 else
aafb1eec 596 blk_queue_write_cache(nbd->disk->queue, false, false);
d02cf531
MP
597}
598
30d53d9c
MP
599static int nbd_dev_dbg_init(struct nbd_device *nbd);
600static void nbd_dev_dbg_close(struct nbd_device *nbd);
601
1a2ad211 602/* Must be called with tx_lock held */
1da177e4 603
f4507164 604static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
1a2ad211
PM
605 unsigned int cmd, unsigned long arg)
606{
1da177e4 607 switch (cmd) {
1a2ad211 608 case NBD_DISCONNECT: {
fd8383fd 609 struct request *sreq;
1a2ad211 610
f4507164 611 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
3a2d63f8
PB
612 if (!nbd->sock)
613 return -EINVAL;
1a2ad211 614
fd8383fd
JB
615 sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
616 if (!sreq)
617 return -ENOMEM;
618
3a2d63f8
PB
619 mutex_unlock(&nbd->tx_lock);
620 fsync_bdev(bdev);
621 mutex_lock(&nbd->tx_lock);
fd8383fd 622 sreq->cmd_type = REQ_TYPE_DRV_PRIV;
3a2d63f8
PB
623
624 /* Check again after getting mutex back. */
fd8383fd
JB
625 if (!nbd->sock) {
626 blk_mq_free_request(sreq);
1da177e4 627 return -EINVAL;
fd8383fd 628 }
3a2d63f8 629
9b4a6ba9 630 set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
c378f70a 631
fd8383fd
JB
632 nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq));
633 blk_mq_free_request(sreq);
c378f70a 634 return 0;
1a2ad211 635 }
1da177e4 636
23272a67
MP
637 case NBD_CLEAR_SOCK:
638 sock_shutdown(nbd);
f4507164 639 nbd_clear_que(nbd);
3a2d63f8 640 kill_bdev(bdev);
1a2ad211 641 return 0;
1a2ad211
PM
642
643 case NBD_SET_SOCK: {
e2511578 644 int err;
23272a67
MP
645 struct socket *sock = sockfd_lookup(arg, &err);
646
647 if (!sock)
648 return err;
649
650 err = nbd_set_socket(nbd, sock);
651 if (!err && max_part)
652 bdev->bd_invalidated = 1;
653
654 return err;
1a2ad211
PM
655 }
656
37091fdd 657 case NBD_SET_BLKSIZE: {
5e454c67 658 loff_t bsize = div_s64(nbd->bytesize, arg);
37091fdd
MP
659
660 return nbd_size_set(nbd, bdev, arg, bsize);
661 }
1a2ad211 662
1da177e4 663 case NBD_SET_SIZE:
37091fdd
MP
664 return nbd_size_set(nbd, bdev, nbd->blksize,
665 arg / nbd->blksize);
666
667 case NBD_SET_SIZE_BLOCKS:
668 return nbd_size_set(nbd, bdev, nbd->blksize, arg);
1a2ad211 669
7fdfd406 670 case NBD_SET_TIMEOUT:
f4507164 671 nbd->xmit_timeout = arg * HZ;
7e2893a1
MP
672 if (arg)
673 mod_timer(&nbd->timeout_timer,
674 jiffies + nbd->xmit_timeout);
675 else
676 del_timer_sync(&nbd->timeout_timer);
677
7fdfd406 678 return 0;
1a2ad211 679
2f012508
PC
680 case NBD_SET_FLAGS:
681 nbd->flags = arg;
682 return 0;
683
1a2ad211 684 case NBD_DO_IT: {
1a2ad211
PM
685 int error;
686
6521d39a 687 if (nbd->task_recv)
c91192d6 688 return -EBUSY;
e2511578 689 if (!nbd->sock)
1da177e4 690 return -EINVAL;
1a2ad211 691
97240963
VN
692 /* We have to claim the device under the lock */
693 nbd->task_recv = current;
f4507164 694 mutex_unlock(&nbd->tx_lock);
1a2ad211 695
d02cf531 696 nbd_parse_flags(nbd, bdev);
a336d298 697
30d53d9c 698 nbd_dev_dbg_init(nbd);
37091fdd 699 error = nbd_thread_recv(nbd, bdev);
30d53d9c 700 nbd_dev_dbg_close(nbd);
1a2ad211 701
f4507164 702 mutex_lock(&nbd->tx_lock);
97240963 703 nbd->task_recv = NULL;
19391830 704
36e47bee 705 sock_shutdown(nbd);
f4507164 706 nbd_clear_que(nbd);
3a2d63f8 707 kill_bdev(bdev);
0e4f0f6f
MP
708 nbd_bdev_reset(bdev);
709
9b4a6ba9
JB
710 /* user requested, ignore socket errors */
711 if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
1f7b5cf1 712 error = 0;
9b4a6ba9 713 if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
1f7b5cf1
MP
714 error = -ETIMEDOUT;
715
0e4f0f6f
MP
716 nbd_reset(nbd);
717
19391830 718 return error;
1a2ad211
PM
719 }
720
1da177e4 721 case NBD_CLEAR_QUE:
4b2f0260
HX
722 /*
723 * This is for compatibility only. The queue is always cleared
724 * by NBD_DO_IT or NBD_CLEAR_SOCK.
725 */
1da177e4 726 return 0;
1a2ad211 727
1da177e4 728 case NBD_PRINT_DEBUG:
fd8383fd
JB
729 /*
730 * For compatibility only, we no longer keep a list of
731 * outstanding requests.
732 */
1da177e4
LT
733 return 0;
734 }
1a2ad211
PM
735 return -ENOTTY;
736}
737
738static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
739 unsigned int cmd, unsigned long arg)
740{
f4507164 741 struct nbd_device *nbd = bdev->bd_disk->private_data;
1a2ad211
PM
742 int error;
743
744 if (!capable(CAP_SYS_ADMIN))
745 return -EPERM;
746
f4507164 747 BUG_ON(nbd->magic != NBD_MAGIC);
1a2ad211 748
f4507164
WG
749 mutex_lock(&nbd->tx_lock);
750 error = __nbd_ioctl(bdev, nbd, cmd, arg);
751 mutex_unlock(&nbd->tx_lock);
1a2ad211
PM
752
753 return error;
1da177e4
LT
754}
755
83d5cde4 756static const struct block_device_operations nbd_fops =
1da177e4
LT
757{
758 .owner = THIS_MODULE,
8a6cfeb6 759 .ioctl = nbd_ioctl,
263a3df1 760 .compat_ioctl = nbd_ioctl,
1da177e4
LT
761};
762
30d53d9c
MP
763#if IS_ENABLED(CONFIG_DEBUG_FS)
764
765static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
766{
767 struct nbd_device *nbd = s->private;
768
769 if (nbd->task_recv)
770 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
771 if (nbd->task_send)
772 seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send));
773
774 return 0;
775}
776
777static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
778{
779 return single_open(file, nbd_dbg_tasks_show, inode->i_private);
780}
781
782static const struct file_operations nbd_dbg_tasks_ops = {
783 .open = nbd_dbg_tasks_open,
784 .read = seq_read,
785 .llseek = seq_lseek,
786 .release = single_release,
787};
788
789static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
790{
791 struct nbd_device *nbd = s->private;
792 u32 flags = nbd->flags;
793
794 seq_printf(s, "Hex: 0x%08x\n\n", flags);
795
796 seq_puts(s, "Known flags:\n");
797
798 if (flags & NBD_FLAG_HAS_FLAGS)
799 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
800 if (flags & NBD_FLAG_READ_ONLY)
801 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
802 if (flags & NBD_FLAG_SEND_FLUSH)
803 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
804 if (flags & NBD_FLAG_SEND_TRIM)
805 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
806
807 return 0;
808}
809
810static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
811{
812 return single_open(file, nbd_dbg_flags_show, inode->i_private);
813}
814
815static const struct file_operations nbd_dbg_flags_ops = {
816 .open = nbd_dbg_flags_open,
817 .read = seq_read,
818 .llseek = seq_lseek,
819 .release = single_release,
820};
821
822static int nbd_dev_dbg_init(struct nbd_device *nbd)
823{
824 struct dentry *dir;
27ea43fe
MP
825
826 if (!nbd_dbg_dir)
827 return -EIO;
30d53d9c
MP
828
829 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
27ea43fe
MP
830 if (!dir) {
831 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
832 nbd_name(nbd));
833 return -EIO;
30d53d9c
MP
834 }
835 nbd->dbg_dir = dir;
836
27ea43fe
MP
837 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
838 debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
839 debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
840 debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
d366a0ff 841 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
30d53d9c
MP
842
843 return 0;
844}
845
846static void nbd_dev_dbg_close(struct nbd_device *nbd)
847{
848 debugfs_remove_recursive(nbd->dbg_dir);
849}
850
851static int nbd_dbg_init(void)
852{
853 struct dentry *dbg_dir;
854
855 dbg_dir = debugfs_create_dir("nbd", NULL);
27ea43fe
MP
856 if (!dbg_dir)
857 return -EIO;
30d53d9c
MP
858
859 nbd_dbg_dir = dbg_dir;
860
861 return 0;
862}
863
864static void nbd_dbg_close(void)
865{
866 debugfs_remove_recursive(nbd_dbg_dir);
867}
868
869#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
870
871static int nbd_dev_dbg_init(struct nbd_device *nbd)
872{
873 return 0;
874}
875
876static void nbd_dev_dbg_close(struct nbd_device *nbd)
877{
878}
879
880static int nbd_dbg_init(void)
881{
882 return 0;
883}
884
885static void nbd_dbg_close(void)
886{
887}
888
889#endif
890
fd8383fd
JB
891static int nbd_init_request(void *data, struct request *rq,
892 unsigned int hctx_idx, unsigned int request_idx,
893 unsigned int numa_node)
894{
895 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
896
897 cmd->nbd = data;
898 INIT_LIST_HEAD(&cmd->list);
899 return 0;
900}
901
902static struct blk_mq_ops nbd_mq_ops = {
903 .queue_rq = nbd_queue_rq,
904 .map_queue = blk_mq_map_queue,
905 .init_request = nbd_init_request,
906};
907
1da177e4
LT
908/*
909 * And here should be modules and kernel interface
910 * (Just smiley confuses emacs :-)
911 */
912
913static int __init nbd_init(void)
914{
915 int err = -ENOMEM;
916 int i;
d71a6d73 917 int part_shift;
1da177e4 918
5b7b18cc 919 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
1da177e4 920
d71a6d73 921 if (max_part < 0) {
7742ce4a 922 printk(KERN_ERR "nbd: max_part must be >= 0\n");
d71a6d73
LV
923 return -EINVAL;
924 }
925
926 part_shift = 0;
5988ce23 927 if (max_part > 0) {
d71a6d73
LV
928 part_shift = fls(max_part);
929
5988ce23
NK
930 /*
931 * Adjust max_part according to part_shift as it is exported
932 * to user space so that user can know the max number of
933 * partition kernel should be able to manage.
934 *
935 * Note that -1 is required because partition 0 is reserved
936 * for the whole disk.
937 */
938 max_part = (1UL << part_shift) - 1;
939 }
940
3b271082
NK
941 if ((1UL << part_shift) > DISK_MAX_PARTS)
942 return -EINVAL;
943
944 if (nbds_max > 1UL << (MINORBITS - part_shift))
945 return -EINVAL;
946
ff6b8090
SM
947 nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
948 if (!nbd_dev)
949 return -ENOMEM;
950
40be0c28 951 for (i = 0; i < nbds_max; i++) {
d71a6d73 952 struct gendisk *disk = alloc_disk(1 << part_shift);
1da177e4
LT
953 if (!disk)
954 goto out;
955 nbd_dev[i].disk = disk;
fd8383fd
JB
956
957 nbd_dev[i].tag_set.ops = &nbd_mq_ops;
958 nbd_dev[i].tag_set.nr_hw_queues = 1;
959 nbd_dev[i].tag_set.queue_depth = 128;
960 nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
961 nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
962 nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
963 BLK_MQ_F_SG_MERGE;
964 nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
965
966 err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
967 if (err) {
968 put_disk(disk);
969 goto out;
970 }
971
1da177e4
LT
972 /*
973 * The new linux 2.5 block layer implementation requires
974 * every gendisk to have its very own request_queue struct.
975 * These structs are big so we dynamically allocate them.
976 */
fd8383fd 977 disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
1da177e4 978 if (!disk->queue) {
fd8383fd 979 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
980 put_disk(disk);
981 goto out;
982 }
fd8383fd 983
31dcfab0
JA
984 /*
985 * Tell the block layer that we are not a rotational device
986 */
987 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
b277da0a 988 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
a336d298 989 disk->queue->limits.discard_granularity = 512;
2bb4cd5c 990 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
a336d298 991 disk->queue->limits.discard_zeroes_data = 0;
078be02b
MB
992 blk_queue_max_hw_sectors(disk->queue, 65536);
993 disk->queue->limits.max_sectors = 256;
1da177e4
LT
994 }
995
996 if (register_blkdev(NBD_MAJOR, "nbd")) {
997 err = -EIO;
998 goto out;
999 }
1000
1001 printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
1da177e4 1002
30d53d9c
MP
1003 nbd_dbg_init();
1004
40be0c28 1005 for (i = 0; i < nbds_max; i++) {
1da177e4 1006 struct gendisk *disk = nbd_dev[i].disk;
f4507164 1007 nbd_dev[i].magic = NBD_MAGIC;
23272a67 1008 spin_lock_init(&nbd_dev[i].sock_lock);
82d4dc5a 1009 mutex_init(&nbd_dev[i].tx_lock);
7e2893a1
MP
1010 init_timer(&nbd_dev[i].timeout_timer);
1011 nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
1012 nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
fd8383fd 1013 atomic_set(&nbd_dev[i].outstanding_cmds, 0);
1da177e4 1014 disk->major = NBD_MAJOR;
d71a6d73 1015 disk->first_minor = i << part_shift;
1da177e4
LT
1016 disk->fops = &nbd_fops;
1017 disk->private_data = &nbd_dev[i];
1da177e4 1018 sprintf(disk->disk_name, "nbd%d", i);
0e4f0f6f 1019 nbd_reset(&nbd_dev[i]);
1da177e4
LT
1020 add_disk(disk);
1021 }
1022
1023 return 0;
1024out:
1025 while (i--) {
fd8383fd 1026 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
1027 blk_cleanup_queue(nbd_dev[i].disk->queue);
1028 put_disk(nbd_dev[i].disk);
1029 }
f3944d61 1030 kfree(nbd_dev);
1da177e4
LT
1031 return err;
1032}
1033
1034static void __exit nbd_cleanup(void)
1035{
1036 int i;
30d53d9c
MP
1037
1038 nbd_dbg_close();
1039
40be0c28 1040 for (i = 0; i < nbds_max; i++) {
1da177e4 1041 struct gendisk *disk = nbd_dev[i].disk;
40be0c28 1042 nbd_dev[i].magic = 0;
1da177e4
LT
1043 if (disk) {
1044 del_gendisk(disk);
1045 blk_cleanup_queue(disk->queue);
fd8383fd 1046 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1da177e4
LT
1047 put_disk(disk);
1048 }
1049 }
1da177e4 1050 unregister_blkdev(NBD_MAJOR, "nbd");
f3944d61 1051 kfree(nbd_dev);
1da177e4
LT
1052 printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
1053}
1054
1055module_init(nbd_init);
1056module_exit(nbd_cleanup);
1057
1058MODULE_DESCRIPTION("Network Block Device");
1059MODULE_LICENSE("GPL");
1060
40be0c28 1061module_param(nbds_max, int, 0444);
d71a6d73
LV
1062MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
1063module_param(max_part, int, 0444);
1064MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");