]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/block/virtio_blk.c
tools/virtio: make vringh_test use inbuf/outbuf.
[mirror_ubuntu-bionic-kernel.git] / drivers / block / virtio_blk.c
CommitLineData
e467cde2
RR
1//#define DEBUG
2#include <linux/spinlock.h>
5a0e3ad6 3#include <linux/slab.h>
e467cde2
RR
4#include <linux/blkdev.h>
5#include <linux/hdreg.h>
0c8d44f2 6#include <linux/module.h>
4678d6f9 7#include <linux/mutex.h>
e467cde2
RR
8#include <linux/virtio.h>
9#include <linux/virtio_blk.h>
3d1266c7 10#include <linux/scatterlist.h>
7a7c924c 11#include <linux/string_helpers.h>
6917f83f 12#include <scsi/scsi_cmnd.h>
5087a50e 13#include <linux/idr.h>
3d1266c7 14
4f3bf19c 15#define PART_BITS 4
e467cde2 16
a98755c5
AH
17static bool use_bio;
18module_param(use_bio, bool, S_IRUGO);
19
5087a50e
MT
20static int major;
21static DEFINE_IDA(vd_index_ida);
22
7a7c924c 23struct workqueue_struct *virtblk_wq;
4f3bf19c 24
e467cde2
RR
25struct virtio_blk
26{
e467cde2
RR
27 struct virtio_device *vdev;
28 struct virtqueue *vq;
a98755c5 29 wait_queue_head_t queue_wait;
e467cde2
RR
30
31 /* The disk structure for the kernel. */
32 struct gendisk *disk;
33
e467cde2
RR
34 mempool_t *pool;
35
7a7c924c
CH
36 /* Process context for config space updates */
37 struct work_struct config_work;
38
4678d6f9
MT
39 /* Lock for config space updates */
40 struct mutex config_lock;
41
42 /* enable config space updates */
43 bool config_enable;
44
0864b79a
RR
45 /* What host tells us, plus 2 for header & tailer. */
46 unsigned int sg_elems;
47
5087a50e
MT
48 /* Ida index - used to track minor number allocations. */
49 int index;
50
e467cde2 51 /* Scatterlist: can be too big for stack. */
0864b79a 52 struct scatterlist sg[/*sg_elems*/];
e467cde2
RR
53};
54
55struct virtblk_req
56{
e467cde2 57 struct request *req;
a98755c5 58 struct bio *bio;
e467cde2 59 struct virtio_blk_outhdr out_hdr;
1cde26f9 60 struct virtio_scsi_inhdr in_hdr;
c85a1f91
AH
61 struct work_struct work;
62 struct virtio_blk *vblk;
63 int flags;
cb38fa23 64 u8 status;
a98755c5 65 struct scatterlist sg[];
e467cde2
RR
66};
67
c85a1f91
AH
68enum {
69 VBLK_IS_FLUSH = 1,
70 VBLK_REQ_FLUSH = 2,
71 VBLK_REQ_DATA = 4,
72 VBLK_REQ_FUA = 8,
73};
74
a98755c5
AH
75static inline int virtblk_result(struct virtblk_req *vbr)
76{
77 switch (vbr->status) {
78 case VIRTIO_BLK_S_OK:
79 return 0;
80 case VIRTIO_BLK_S_UNSUPP:
81 return -ENOTTY;
82 default:
83 return -EIO;
84 }
85}
86
c85a1f91
AH
87static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
88 gfp_t gfp_mask)
89{
90 struct virtblk_req *vbr;
91
92 vbr = mempool_alloc(vblk->pool, gfp_mask);
f22cf8eb
DC
93 if (!vbr)
94 return NULL;
c85a1f91
AH
95
96 vbr->vblk = vblk;
f22cf8eb
DC
97 if (use_bio)
98 sg_init_table(vbr->sg, vblk->sg_elems);
c85a1f91
AH
99
100 return vbr;
101}
102
103static void virtblk_add_buf_wait(struct virtio_blk *vblk,
104 struct virtblk_req *vbr,
105 unsigned long out,
106 unsigned long in)
107{
108 DEFINE_WAIT(wait);
109
110 for (;;) {
111 prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
112 TASK_UNINTERRUPTIBLE);
113
114 spin_lock_irq(vblk->disk->queue->queue_lock);
115 if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
116 GFP_ATOMIC) < 0) {
117 spin_unlock_irq(vblk->disk->queue->queue_lock);
118 io_schedule();
119 } else {
120 virtqueue_kick(vblk->vq);
121 spin_unlock_irq(vblk->disk->queue->queue_lock);
122 break;
123 }
124
125 }
126
127 finish_wait(&vblk->queue_wait, &wait);
128}
129
130static inline void virtblk_add_req(struct virtblk_req *vbr,
131 unsigned int out, unsigned int in)
132{
133 struct virtio_blk *vblk = vbr->vblk;
134
135 spin_lock_irq(vblk->disk->queue->queue_lock);
136 if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
137 GFP_ATOMIC) < 0)) {
138 spin_unlock_irq(vblk->disk->queue->queue_lock);
139 virtblk_add_buf_wait(vblk, vbr, out, in);
140 return;
141 }
142 virtqueue_kick(vblk->vq);
143 spin_unlock_irq(vblk->disk->queue->queue_lock);
144}
145
146static int virtblk_bio_send_flush(struct virtblk_req *vbr)
147{
148 unsigned int out = 0, in = 0;
149
150 vbr->flags |= VBLK_IS_FLUSH;
151 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
152 vbr->out_hdr.sector = 0;
153 vbr->out_hdr.ioprio = 0;
154 sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
155 sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
156
157 virtblk_add_req(vbr, out, in);
158
159 return 0;
160}
161
162static int virtblk_bio_send_data(struct virtblk_req *vbr)
163{
164 struct virtio_blk *vblk = vbr->vblk;
165 unsigned int num, out = 0, in = 0;
166 struct bio *bio = vbr->bio;
167
168 vbr->flags &= ~VBLK_IS_FLUSH;
169 vbr->out_hdr.type = 0;
170 vbr->out_hdr.sector = bio->bi_sector;
171 vbr->out_hdr.ioprio = bio_prio(bio);
172
173 sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
174
175 num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
176
177 sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
178 sizeof(vbr->status));
179
180 if (num) {
181 if (bio->bi_rw & REQ_WRITE) {
182 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
183 out += num;
184 } else {
185 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
186 in += num;
187 }
188 }
189
190 virtblk_add_req(vbr, out, in);
191
192 return 0;
193}
194
195static void virtblk_bio_send_data_work(struct work_struct *work)
196{
197 struct virtblk_req *vbr;
198
199 vbr = container_of(work, struct virtblk_req, work);
200
201 virtblk_bio_send_data(vbr);
202}
203
204static void virtblk_bio_send_flush_work(struct work_struct *work)
205{
206 struct virtblk_req *vbr;
207
208 vbr = container_of(work, struct virtblk_req, work);
209
210 virtblk_bio_send_flush(vbr);
211}
212
213static inline void virtblk_request_done(struct virtblk_req *vbr)
a98755c5 214{
c85a1f91 215 struct virtio_blk *vblk = vbr->vblk;
a98755c5
AH
216 struct request *req = vbr->req;
217 int error = virtblk_result(vbr);
218
219 if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
220 req->resid_len = vbr->in_hdr.residual;
221 req->sense_len = vbr->in_hdr.sense_len;
222 req->errors = vbr->in_hdr.errors;
223 } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
224 req->errors = (error != 0);
225 }
226
227 __blk_end_request_all(req, error);
228 mempool_free(vbr, vblk->pool);
229}
230
c85a1f91 231static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
a98755c5 232{
c85a1f91
AH
233 struct virtio_blk *vblk = vbr->vblk;
234
235 if (vbr->flags & VBLK_REQ_DATA) {
236 /* Send out the actual write data */
237 INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
238 queue_work(virtblk_wq, &vbr->work);
239 } else {
240 bio_endio(vbr->bio, virtblk_result(vbr));
241 mempool_free(vbr, vblk->pool);
242 }
243}
244
245static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
246{
247 struct virtio_blk *vblk = vbr->vblk;
248
249 if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
250 /* Send out a flush before end the bio */
251 vbr->flags &= ~VBLK_REQ_DATA;
252 INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
253 queue_work(virtblk_wq, &vbr->work);
254 } else {
255 bio_endio(vbr->bio, virtblk_result(vbr));
256 mempool_free(vbr, vblk->pool);
257 }
258}
259
260static inline void virtblk_bio_done(struct virtblk_req *vbr)
261{
262 if (unlikely(vbr->flags & VBLK_IS_FLUSH))
263 virtblk_bio_flush_done(vbr);
264 else
265 virtblk_bio_data_done(vbr);
a98755c5
AH
266}
267
268static void virtblk_done(struct virtqueue *vq)
e467cde2
RR
269{
270 struct virtio_blk *vblk = vq->vdev->priv;
c85a1f91 271 bool bio_done = false, req_done = false;
e467cde2 272 struct virtblk_req *vbr;
e467cde2 273 unsigned long flags;
a98755c5 274 unsigned int len;
e467cde2 275
2c95a329 276 spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
bb811108
AH
277 do {
278 virtqueue_disable_cb(vq);
279 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
280 if (vbr->bio) {
281 virtblk_bio_done(vbr);
282 bio_done = true;
283 } else {
284 virtblk_request_done(vbr);
285 req_done = true;
286 }
33659ebb 287 }
bb811108 288 } while (!virtqueue_enable_cb(vq));
e467cde2 289 /* In case queue is stopped waiting for more buffers. */
a98755c5
AH
290 if (req_done)
291 blk_start_queue(vblk->disk->queue);
2c95a329 292 spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
a98755c5
AH
293
294 if (bio_done)
295 wake_up(&vblk->queue_wait);
296}
297
e467cde2
RR
298static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
299 struct request *req)
300{
1cde26f9 301 unsigned long num, out = 0, in = 0;
e467cde2
RR
302 struct virtblk_req *vbr;
303
a98755c5 304 vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
e467cde2
RR
305 if (!vbr)
306 /* When another request finishes we'll try again. */
307 return false;
308
309 vbr->req = req;
a98755c5 310 vbr->bio = NULL;
dd40e456
FT
311 if (req->cmd_flags & REQ_FLUSH) {
312 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
4cb2ea28 313 vbr->out_hdr.sector = 0;
314 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
dd40e456
FT
315 } else {
316 switch (req->cmd_type) {
317 case REQ_TYPE_FS:
318 vbr->out_hdr.type = 0;
319 vbr->out_hdr.sector = blk_rq_pos(vbr->req);
320 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
321 break;
322 case REQ_TYPE_BLOCK_PC:
323 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
f1b0ef06
CH
324 vbr->out_hdr.sector = 0;
325 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
326 break;
dd40e456
FT
327 case REQ_TYPE_SPECIAL:
328 vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
329 vbr->out_hdr.sector = 0;
330 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
331 break;
332 default:
333 /* We don't put anything else in the queue. */
334 BUG();
f1b0ef06 335 }
e467cde2
RR
336 }
337
1cde26f9 338 sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
e467cde2 339
1cde26f9
HR
340 /*
341 * If this is a packet command we need a couple of additional headers.
342 * Behind the normal outhdr we put a segment with the scsi command
343 * block, and before the normal inhdr we put the sense data and the
344 * inhdr with additional status information before the normal inhdr.
345 */
33659ebb 346 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
1cde26f9
HR
347 sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
348
349 num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
350
33659ebb 351 if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
6917f83f 352 sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
1cde26f9
HR
353 sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
354 sizeof(vbr->in_hdr));
355 }
356
357 sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
358 sizeof(vbr->status));
359
360 if (num) {
361 if (rq_data_dir(vbr->req) == WRITE) {
362 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
363 out += num;
364 } else {
365 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
366 in += num;
367 }
e467cde2
RR
368 }
369
a98755c5
AH
370 if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
371 GFP_ATOMIC) < 0) {
e467cde2
RR
372 mempool_free(vbr, vblk->pool);
373 return false;
374 }
375
e467cde2
RR
376 return true;
377}
378
a98755c5 379static void virtblk_request(struct request_queue *q)
e467cde2 380{
6c3b46f7 381 struct virtio_blk *vblk = q->queuedata;
e467cde2
RR
382 struct request *req;
383 unsigned int issued = 0;
384
9934c8c0 385 while ((req = blk_peek_request(q)) != NULL) {
0864b79a 386 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
e467cde2
RR
387
388 /* If this request fails, stop queue and wait for something to
389 finish to restart it. */
390 if (!do_req(q, vblk, req)) {
391 blk_stop_queue(q);
392 break;
393 }
9934c8c0 394 blk_start_request(req);
e467cde2
RR
395 issued++;
396 }
397
398 if (issued)
09ec6b69 399 virtqueue_kick(vblk->vq);
e467cde2
RR
400}
401
a98755c5
AH
402static void virtblk_make_request(struct request_queue *q, struct bio *bio)
403{
404 struct virtio_blk *vblk = q->queuedata;
a98755c5
AH
405 struct virtblk_req *vbr;
406
407 BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
a98755c5
AH
408
409 vbr = virtblk_alloc_req(vblk, GFP_NOIO);
410 if (!vbr) {
411 bio_endio(bio, -ENOMEM);
412 return;
413 }
414
415 vbr->bio = bio;
c85a1f91
AH
416 vbr->flags = 0;
417 if (bio->bi_rw & REQ_FLUSH)
418 vbr->flags |= VBLK_REQ_FLUSH;
419 if (bio->bi_rw & REQ_FUA)
420 vbr->flags |= VBLK_REQ_FUA;
421 if (bio->bi_size)
422 vbr->flags |= VBLK_REQ_DATA;
423
424 if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
425 virtblk_bio_send_flush(vbr);
426 else
427 virtblk_bio_send_data(vbr);
a98755c5
AH
428}
429
4cb2ea28 430/* return id (s/n) string for *disk to *id_str
431 */
432static int virtblk_get_id(struct gendisk *disk, char *id_str)
433{
434 struct virtio_blk *vblk = disk->private_data;
435 struct request *req;
436 struct bio *bio;
e4c4776d 437 int err;
4cb2ea28 438
439 bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
440 GFP_KERNEL);
441 if (IS_ERR(bio))
442 return PTR_ERR(bio);
443
444 req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
445 if (IS_ERR(req)) {
446 bio_put(bio);
447 return PTR_ERR(req);
448 }
449
450 req->cmd_type = REQ_TYPE_SPECIAL;
e4c4776d
MS
451 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
452 blk_put_request(req);
453
454 return err;
4cb2ea28 455}
456
fe5a50a1
CH
457static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
458 unsigned int cmd, unsigned long data)
e467cde2 459{
1cde26f9
HR
460 struct gendisk *disk = bdev->bd_disk;
461 struct virtio_blk *vblk = disk->private_data;
462
463 /*
464 * Only allow the generic SCSI ioctls if the host can support it.
465 */
466 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
d9ecdea7 467 return -ENOTTY;
1cde26f9 468
577ebb37
PB
469 return scsi_cmd_blk_ioctl(bdev, mode, cmd,
470 (void __user *)data);
e467cde2
RR
471}
472
135da0b0
CB
473/* We provide getgeo only to please some old bootloader/partitioning tools */
474static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
475{
48e4043d
RH
476 struct virtio_blk *vblk = bd->bd_disk->private_data;
477 struct virtio_blk_geometry vgeo;
478 int err;
479
480 /* see if the host passed in geometry config */
481 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
482 offsetof(struct virtio_blk_config, geometry),
483 &vgeo);
484
485 if (!err) {
486 geo->heads = vgeo.heads;
487 geo->sectors = vgeo.sectors;
488 geo->cylinders = vgeo.cylinders;
489 } else {
490 /* some standard values, similar to sd */
491 geo->heads = 1 << 6;
492 geo->sectors = 1 << 5;
493 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
494 }
135da0b0
CB
495 return 0;
496}
497
83d5cde4 498static const struct block_device_operations virtblk_fops = {
8a6cfeb6 499 .ioctl = virtblk_ioctl,
135da0b0
CB
500 .owner = THIS_MODULE,
501 .getgeo = virtblk_getgeo,
e467cde2
RR
502};
503
d50ed907
CB
504static int index_to_minor(int index)
505{
506 return index << PART_BITS;
507}
508
5087a50e
MT
509static int minor_to_index(int minor)
510{
511 return minor >> PART_BITS;
512}
513
a5eb9e4f
RH
514static ssize_t virtblk_serial_show(struct device *dev,
515 struct device_attribute *attr, char *buf)
516{
517 struct gendisk *disk = dev_to_disk(dev);
518 int err;
519
520 /* sysfs gives us a PAGE_SIZE buffer */
521 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
522
523 buf[VIRTIO_BLK_ID_BYTES] = '\0';
524 err = virtblk_get_id(disk, buf);
525 if (!err)
526 return strlen(buf);
527
528 if (err == -EIO) /* Unsupported? Make it empty. */
529 return 0;
530
531 return err;
532}
533DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
534
7a7c924c
CH
535static void virtblk_config_changed_work(struct work_struct *work)
536{
537 struct virtio_blk *vblk =
538 container_of(work, struct virtio_blk, config_work);
539 struct virtio_device *vdev = vblk->vdev;
540 struct request_queue *q = vblk->disk->queue;
541 char cap_str_2[10], cap_str_10[10];
9d9598b8 542 char *envp[] = { "RESIZE=1", NULL };
7a7c924c
CH
543 u64 capacity, size;
544
4678d6f9
MT
545 mutex_lock(&vblk->config_lock);
546 if (!vblk->config_enable)
547 goto done;
548
7a7c924c
CH
549 /* Host must always specify the capacity. */
550 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
551 &capacity, sizeof(capacity));
552
553 /* If capacity is too big, truncate with warning. */
554 if ((sector_t)capacity != capacity) {
555 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
556 (unsigned long long)capacity);
557 capacity = (sector_t)-1;
558 }
559
560 size = capacity * queue_logical_block_size(q);
561 string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
562 string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
563
564 dev_notice(&vdev->dev,
565 "new size: %llu %d-byte logical blocks (%s/%s)\n",
566 (unsigned long long)capacity,
567 queue_logical_block_size(q),
568 cap_str_10, cap_str_2);
569
570 set_capacity(vblk->disk, capacity);
e9986f30 571 revalidate_disk(vblk->disk);
9d9598b8 572 kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
4678d6f9
MT
573done:
574 mutex_unlock(&vblk->config_lock);
7a7c924c
CH
575}
576
577static void virtblk_config_changed(struct virtio_device *vdev)
578{
579 struct virtio_blk *vblk = vdev->priv;
580
581 queue_work(virtblk_wq, &vblk->config_work);
582}
583
6abd6e5a
AS
584static int init_vq(struct virtio_blk *vblk)
585{
586 int err = 0;
587
588 /* We expect one virtqueue, for output. */
a98755c5 589 vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
6abd6e5a
AS
590 if (IS_ERR(vblk->vq))
591 err = PTR_ERR(vblk->vq);
592
593 return err;
594}
595
c0aa3e09
RM
596/*
597 * Legacy naming scheme used for virtio devices. We are stuck with it for
598 * virtio blk but don't ever use it for any new driver.
599 */
600static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
601{
602 const int base = 'z' - 'a' + 1;
603 char *begin = buf + strlen(prefix);
604 char *end = buf + buflen;
605 char *p;
606 int unit;
607
608 p = end - 1;
609 *p = '\0';
610 unit = base;
611 do {
612 if (p == begin)
613 return -EINVAL;
614 *--p = 'a' + (index % unit);
615 index = (index / unit) - 1;
616 } while (index >= 0);
617
618 memmove(begin, p, end - p);
619 memcpy(buf, prefix, strlen(prefix));
620
621 return 0;
622}
623
cd5d5038
PB
624static int virtblk_get_cache_mode(struct virtio_device *vdev)
625{
626 u8 writeback;
627 int err;
628
629 err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
630 offsetof(struct virtio_blk_config, wce),
631 &writeback);
632 if (err)
633 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
634
635 return writeback;
636}
637
638static void virtblk_update_cache_mode(struct virtio_device *vdev)
639{
640 u8 writeback = virtblk_get_cache_mode(vdev);
641 struct virtio_blk *vblk = vdev->priv;
642
c85a1f91 643 if (writeback)
cd5d5038
PB
644 blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
645 else
646 blk_queue_flush(vblk->disk->queue, 0);
647
648 revalidate_disk(vblk->disk);
649}
650
651static const char *const virtblk_cache_types[] = {
652 "write through", "write back"
653};
654
655static ssize_t
656virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
657 const char *buf, size_t count)
658{
659 struct gendisk *disk = dev_to_disk(dev);
660 struct virtio_blk *vblk = disk->private_data;
661 struct virtio_device *vdev = vblk->vdev;
662 int i;
663 u8 writeback;
664
665 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
666 for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
667 if (sysfs_streq(buf, virtblk_cache_types[i]))
668 break;
669
670 if (i < 0)
671 return -EINVAL;
672
673 writeback = i;
674 vdev->config->set(vdev,
675 offsetof(struct virtio_blk_config, wce),
676 &writeback, sizeof(writeback));
677
678 virtblk_update_cache_mode(vdev);
679 return count;
680}
681
682static ssize_t
683virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
684 char *buf)
685{
686 struct gendisk *disk = dev_to_disk(dev);
687 struct virtio_blk *vblk = disk->private_data;
688 u8 writeback = virtblk_get_cache_mode(vblk->vdev);
689
690 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
691 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
692}
693
694static const struct device_attribute dev_attr_cache_type_ro =
695 __ATTR(cache_type, S_IRUGO,
696 virtblk_cache_type_show, NULL);
697static const struct device_attribute dev_attr_cache_type_rw =
698 __ATTR(cache_type, S_IRUGO|S_IWUSR,
699 virtblk_cache_type_show, virtblk_cache_type_store);
700
8d85fce7 701static int virtblk_probe(struct virtio_device *vdev)
e467cde2
RR
702{
703 struct virtio_blk *vblk;
69740c8b 704 struct request_queue *q;
5087a50e 705 int err, index;
a98755c5
AH
706 int pool_size;
707
e467cde2 708 u64 cap;
69740c8b
CH
709 u32 v, blk_size, sg_elems, opt_io_size;
710 u16 min_io_size;
711 u8 physical_block_exp, alignment_offset;
e467cde2 712
5087a50e
MT
713 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
714 GFP_KERNEL);
715 if (err < 0)
716 goto out;
717 index = err;
4f3bf19c 718
0864b79a
RR
719 /* We need to know how many segments before we allocate. */
720 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
721 offsetof(struct virtio_blk_config, seg_max),
722 &sg_elems);
a5b365a6
CH
723
724 /* We need at least one SG element, whatever they say. */
725 if (err || !sg_elems)
0864b79a
RR
726 sg_elems = 1;
727
728 /* We need an extra sg elements at head and tail. */
729 sg_elems += 2;
730 vdev->priv = vblk = kmalloc(sizeof(*vblk) +
731 sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
e467cde2
RR
732 if (!vblk) {
733 err = -ENOMEM;
5087a50e 734 goto out_free_index;
e467cde2
RR
735 }
736
a98755c5 737 init_waitqueue_head(&vblk->queue_wait);
e467cde2 738 vblk->vdev = vdev;
0864b79a
RR
739 vblk->sg_elems = sg_elems;
740 sg_init_table(vblk->sg, vblk->sg_elems);
4678d6f9 741 mutex_init(&vblk->config_lock);
a98755c5 742
7a7c924c 743 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
4678d6f9 744 vblk->config_enable = true;
e467cde2 745
6abd6e5a
AS
746 err = init_vq(vblk);
747 if (err)
e467cde2 748 goto out_free_vblk;
e467cde2 749
a98755c5
AH
750 pool_size = sizeof(struct virtblk_req);
751 if (use_bio)
752 pool_size += sizeof(struct scatterlist) * sg_elems;
753 vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
e467cde2
RR
754 if (!vblk->pool) {
755 err = -ENOMEM;
756 goto out_free_vq;
757 }
758
e467cde2 759 /* FIXME: How many partitions? How long is a piece of string? */
4f3bf19c 760 vblk->disk = alloc_disk(1 << PART_BITS);
e467cde2
RR
761 if (!vblk->disk) {
762 err = -ENOMEM;
4f3bf19c 763 goto out_mempool;
e467cde2
RR
764 }
765
a98755c5 766 q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
69740c8b 767 if (!q) {
e467cde2
RR
768 err = -ENOMEM;
769 goto out_put_disk;
770 }
771
a98755c5
AH
772 if (use_bio)
773 blk_queue_make_request(q, virtblk_make_request);
69740c8b 774 q->queuedata = vblk;
7d116b62 775
c0aa3e09 776 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
d50ed907 777
e467cde2 778 vblk->disk->major = major;
d50ed907 779 vblk->disk->first_minor = index_to_minor(index);
e467cde2
RR
780 vblk->disk->private_data = vblk;
781 vblk->disk->fops = &virtblk_fops;
c4839346 782 vblk->disk->driverfs_dev = &vdev->dev;
5087a50e 783 vblk->index = index;
4f3bf19c 784
02c42b7a 785 /* configure queue flush support */
cd5d5038 786 virtblk_update_cache_mode(vdev);
e467cde2 787
3ef53609
CB
788 /* If disk is read-only in the host, the guest should obey */
789 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
790 set_disk_ro(vblk->disk, 1);
791
a586d4f6 792 /* Host must always specify the capacity. */
72e61eb4
RR
793 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
794 &cap, sizeof(cap));
e467cde2
RR
795
796 /* If capacity is too big, truncate with warning. */
797 if ((sector_t)cap != cap) {
798 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
799 (unsigned long long)cap);
800 cap = (sector_t)-1;
801 }
802 set_capacity(vblk->disk, cap);
803
0864b79a 804 /* We can handle whatever the host told us to handle. */
ee714f2d 805 blk_queue_max_segments(q, vblk->sg_elems-2);
0864b79a 806
4eff3cae 807 /* No need to bounce any requests */
69740c8b 808 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
4eff3cae 809
4b7f7e20 810 /* No real sector limit. */
ee714f2d 811 blk_queue_max_hw_sectors(q, -1U);
4b7f7e20 812
a586d4f6
RR
813 /* Host can optionally specify maximum segment size and number of
814 * segments. */
815 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
816 offsetof(struct virtio_blk_config, size_max),
817 &v);
e467cde2 818 if (!err)
69740c8b 819 blk_queue_max_segment_size(q, v);
4b7f7e20 820 else
69740c8b 821 blk_queue_max_segment_size(q, -1U);
e467cde2 822
066f4d82
CB
823 /* Host can optionally specify the block size of the device */
824 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
825 offsetof(struct virtio_blk_config, blk_size),
826 &blk_size);
827 if (!err)
69740c8b
CH
828 blk_queue_logical_block_size(q, blk_size);
829 else
830 blk_size = queue_logical_block_size(q);
831
832 /* Use topology information if available */
833 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
834 offsetof(struct virtio_blk_config, physical_block_exp),
835 &physical_block_exp);
836 if (!err && physical_block_exp)
837 blk_queue_physical_block_size(q,
838 blk_size * (1 << physical_block_exp));
839
840 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
841 offsetof(struct virtio_blk_config, alignment_offset),
842 &alignment_offset);
843 if (!err && alignment_offset)
844 blk_queue_alignment_offset(q, blk_size * alignment_offset);
845
846 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
847 offsetof(struct virtio_blk_config, min_io_size),
848 &min_io_size);
849 if (!err && min_io_size)
850 blk_queue_io_min(q, blk_size * min_io_size);
851
852 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
853 offsetof(struct virtio_blk_config, opt_io_size),
854 &opt_io_size);
855 if (!err && opt_io_size)
856 blk_queue_io_opt(q, blk_size * opt_io_size);
857
e467cde2 858 add_disk(vblk->disk);
a5eb9e4f
RH
859 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
860 if (err)
861 goto out_del_disk;
862
cd5d5038
PB
863 if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
864 err = device_create_file(disk_to_dev(vblk->disk),
865 &dev_attr_cache_type_rw);
866 else
867 err = device_create_file(disk_to_dev(vblk->disk),
868 &dev_attr_cache_type_ro);
869 if (err)
870 goto out_del_disk;
e467cde2
RR
871 return 0;
872
a5eb9e4f
RH
873out_del_disk:
874 del_gendisk(vblk->disk);
875 blk_cleanup_queue(vblk->disk->queue);
e467cde2
RR
876out_put_disk:
877 put_disk(vblk->disk);
e467cde2
RR
878out_mempool:
879 mempool_destroy(vblk->pool);
880out_free_vq:
d2a7ddda 881 vdev->config->del_vqs(vdev);
e467cde2
RR
882out_free_vblk:
883 kfree(vblk);
5087a50e
MT
884out_free_index:
885 ida_simple_remove(&vd_index_ida, index);
e467cde2
RR
886out:
887 return err;
888}
889
8d85fce7 890static void virtblk_remove(struct virtio_device *vdev)
e467cde2
RR
891{
892 struct virtio_blk *vblk = vdev->priv;
5087a50e 893 int index = vblk->index;
f4953fe6 894 int refc;
e467cde2 895
4678d6f9
MT
896 /* Prevent config work handler from accessing the device. */
897 mutex_lock(&vblk->config_lock);
898 vblk->config_enable = false;
899 mutex_unlock(&vblk->config_lock);
7a7c924c 900
02e2b124 901 del_gendisk(vblk->disk);
483001c7 902 blk_cleanup_queue(vblk->disk->queue);
02e2b124 903
6e5aa7ef
RR
904 /* Stop all the virtqueues. */
905 vdev->config->reset(vdev);
906
4678d6f9
MT
907 flush_work(&vblk->config_work);
908
f4953fe6 909 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
e467cde2 910 put_disk(vblk->disk);
e467cde2 911 mempool_destroy(vblk->pool);
d2a7ddda 912 vdev->config->del_vqs(vdev);
e467cde2 913 kfree(vblk);
f4953fe6
AG
914
915 /* Only free device id if we don't have any users */
916 if (refc == 1)
917 ida_simple_remove(&vd_index_ida, index);
e467cde2
RR
918}
919
f8fb5bc2
AS
920#ifdef CONFIG_PM
921static int virtblk_freeze(struct virtio_device *vdev)
922{
923 struct virtio_blk *vblk = vdev->priv;
924
925 /* Ensure we don't receive any more interrupts */
926 vdev->config->reset(vdev);
927
928 /* Prevent config work handler from accessing the device. */
929 mutex_lock(&vblk->config_lock);
930 vblk->config_enable = false;
931 mutex_unlock(&vblk->config_lock);
932
933 flush_work(&vblk->config_work);
934
935 spin_lock_irq(vblk->disk->queue->queue_lock);
936 blk_stop_queue(vblk->disk->queue);
937 spin_unlock_irq(vblk->disk->queue->queue_lock);
938 blk_sync_queue(vblk->disk->queue);
939
940 vdev->config->del_vqs(vdev);
941 return 0;
942}
943
944static int virtblk_restore(struct virtio_device *vdev)
945{
946 struct virtio_blk *vblk = vdev->priv;
947 int ret;
948
949 vblk->config_enable = true;
950 ret = init_vq(vdev->priv);
951 if (!ret) {
952 spin_lock_irq(vblk->disk->queue->queue_lock);
953 blk_start_queue(vblk->disk->queue);
954 spin_unlock_irq(vblk->disk->queue->queue_lock);
955 }
956 return ret;
957}
958#endif
959
47483e25 960static const struct virtio_device_id id_table[] = {
e467cde2
RR
961 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
962 { 0 },
963};
964
c45a6816 965static unsigned int features[] = {
02c42b7a
TH
966 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
967 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
cd5d5038 968 VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
c45a6816
RR
969};
970
8d85fce7 971static struct virtio_driver virtio_blk = {
7a7c924c
CH
972 .feature_table = features,
973 .feature_table_size = ARRAY_SIZE(features),
974 .driver.name = KBUILD_MODNAME,
975 .driver.owner = THIS_MODULE,
976 .id_table = id_table,
977 .probe = virtblk_probe,
8d85fce7 978 .remove = virtblk_remove,
7a7c924c 979 .config_changed = virtblk_config_changed,
f8fb5bc2
AS
980#ifdef CONFIG_PM
981 .freeze = virtblk_freeze,
982 .restore = virtblk_restore,
983#endif
e467cde2
RR
984};
985
986static int __init init(void)
987{
7a7c924c
CH
988 int error;
989
990 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
991 if (!virtblk_wq)
992 return -ENOMEM;
993
4f3bf19c 994 major = register_blkdev(0, "virtblk");
7a7c924c
CH
995 if (major < 0) {
996 error = major;
997 goto out_destroy_workqueue;
998 }
999
1000 error = register_virtio_driver(&virtio_blk);
1001 if (error)
1002 goto out_unregister_blkdev;
1003 return 0;
1004
1005out_unregister_blkdev:
1006 unregister_blkdev(major, "virtblk");
1007out_destroy_workqueue:
1008 destroy_workqueue(virtblk_wq);
1009 return error;
e467cde2
RR
1010}
1011
1012static void __exit fini(void)
1013{
4f3bf19c 1014 unregister_blkdev(major, "virtblk");
e467cde2 1015 unregister_virtio_driver(&virtio_blk);
7a7c924c 1016 destroy_workqueue(virtblk_wq);
e467cde2
RR
1017}
1018module_init(init);
1019module_exit(fini);
1020
1021MODULE_DEVICE_TABLE(virtio, id_table);
1022MODULE_DESCRIPTION("Virtio block driver");
1023MODULE_LICENSE("GPL");