drivers/block/null_blk.c

   1 #include <linux/module.h>
   2
   3 #include <linux/moduleparam.h>
   4 #include <linux/sched.h>
   5 #include <linux/fs.h>
   6 #include <linux/blkdev.h>
   7 #include <linux/init.h>
   8 #include <linux/slab.h>
   9 #include <linux/blk-mq.h>
  10 #include <linux/hrtimer.h>
  11
  12 struct nullb_cmd {
  13         struct list_head list;
  14         struct llist_node ll_list;
  15         struct call_single_data csd;
  16         struct request *rq;
  17         struct bio *bio;
  18         unsigned int tag;
  19         struct nullb_queue *nq;
  20 };
  21
  22 struct nullb_queue {
  23         unsigned long *tag_map;
  24         wait_queue_head_t wait;
  25         unsigned int queue_depth;
  26
  27         struct nullb_cmd *cmds;
  28 };
  29
  30 struct nullb {
  31         struct list_head list;
  32         unsigned int index;
  33         struct request_queue *q;
  34         struct gendisk *disk;
  35         struct blk_mq_tag_set tag_set;
  36         struct hrtimer timer;
  37         unsigned int queue_depth;
  38         spinlock_t lock;
  39
  40         struct nullb_queue *queues;
  41         unsigned int nr_queues;
  42 };
  43
  44 static LIST_HEAD(nullb_list);
  45 static struct mutex lock;
  46 static int null_major;
  47 static int nullb_indexes;
  48
  49 struct completion_queue {
  50         struct llist_head list;
  51         struct hrtimer timer;
  52 };
  53
  54 /*
  55  * These are per-cpu for now, they will need to be configured by the
  56  * complete_queues parameter and appropriately mapped.
  57  */
  58 static DEFINE_PER_CPU(struct completion_queue, completion_queues);
  59
  60 enum {
  61         NULL_IRQ_NONE           = 0,
  62         NULL_IRQ_SOFTIRQ        = 1,
  63         NULL_IRQ_TIMER          = 2,
  64 };
  65
  66 enum {
  67         NULL_Q_BIO              = 0,
  68         NULL_Q_RQ               = 1,
  69         NULL_Q_MQ               = 2,
  70 };
  71
  72 static int submit_queues;
  73 module_param(submit_queues, int, S_IRUGO);
  74 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
  75
  76 static int home_node = NUMA_NO_NODE;
  77 module_param(home_node, int, S_IRUGO);
  78 MODULE_PARM_DESC(home_node, "Home node for the device");
  79
  80 static int queue_mode = NULL_Q_MQ;
  81 module_param(queue_mode, int, S_IRUGO);
  82 MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
  83
  84 static int gb = 250;
  85 module_param(gb, int, S_IRUGO);
  86 MODULE_PARM_DESC(gb, "Size in GB");
  87
  88 static int bs = 512;
  89 module_param(bs, int, S_IRUGO);
  90 MODULE_PARM_DESC(bs, "Block size (in bytes)");
  91
  92 static int nr_devices = 2;
  93 module_param(nr_devices, int, S_IRUGO);
  94 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
  95
  96 static int irqmode = NULL_IRQ_SOFTIRQ;
  97 module_param(irqmode, int, S_IRUGO);
  98 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
  99
 100 static int completion_nsec = 10000;
 101 module_param(completion_nsec, int, S_IRUGO);
 102 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
 103
 104 static int hw_queue_depth = 64;
 105 module_param(hw_queue_depth, int, S_IRUGO);
 106 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
 107
 108 static bool use_per_node_hctx = false;
 109 module_param(use_per_node_hctx, bool, S_IRUGO);
 110 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
 111
 112 static void put_tag(struct nullb_queue *nq, unsigned int tag)
 113 {
 114         clear_bit_unlock(tag, nq->tag_map);
 115
 116         if (waitqueue_active(&nq->wait))
 117                 wake_up(&nq->wait);
 118 }
 119
 120 static unsigned int get_tag(struct nullb_queue *nq)
 121 {
 122         unsigned int tag;
 123
 124         do {
 125                 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
 126                 if (tag >= nq->queue_depth)
 127                         return -1U;
 128         } while (test_and_set_bit_lock(tag, nq->tag_map));
 129
 130         return tag;
 131 }
 132
 133 static void free_cmd(struct nullb_cmd *cmd)
 134 {
 135         put_tag(cmd->nq, cmd->tag);
 136 }
 137
 138 static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
 139 {
 140         struct nullb_cmd *cmd;
 141         unsigned int tag;
 142
 143         tag = get_tag(nq);
 144         if (tag != -1U) {
 145                 cmd = &nq->cmds[tag];
 146                 cmd->tag = tag;
 147                 cmd->nq = nq;
 148                 return cmd;
 149         }
 150
 151         return NULL;
 152 }
 153
 154 static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
 155 {
 156         struct nullb_cmd *cmd;
 157         DEFINE_WAIT(wait);
 158
 159         cmd = __alloc_cmd(nq);
 160         if (cmd || !can_wait)
 161                 return cmd;
 162
 163         do {
 164                 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
 165                 cmd = __alloc_cmd(nq);
 166                 if (cmd)
 167                         break;
 168
 169                 io_schedule();
 170         } while (1);
 171
 172         finish_wait(&nq->wait, &wait);
 173         return cmd;
 174 }
 175
 176 static void end_cmd(struct nullb_cmd *cmd)
 177 {
 178         switch (queue_mode)  {
 179         case NULL_Q_MQ:
 180                 blk_mq_end_io(cmd->rq, 0);
 181                 return;
 182         case NULL_Q_RQ:
 183                 INIT_LIST_HEAD(&cmd->rq->queuelist);
 184                 blk_end_request_all(cmd->rq, 0);
 185                 break;
 186         case NULL_Q_BIO:
 187                 bio_endio(cmd->bio, 0);
 188                 break;
 189         }
 190
 191         free_cmd(cmd);
 192 }
 193
 194 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 195 {
 196         struct completion_queue *cq;
 197         struct llist_node *entry;
 198         struct nullb_cmd *cmd;
 199
 200         cq = &per_cpu(completion_queues, smp_processor_id());
 201
 202         while ((entry = llist_del_all(&cq->list)) != NULL) {
 203                 entry = llist_reverse_order(entry);
 204                 do {
 205                         cmd = container_of(entry, struct nullb_cmd, ll_list);
 206                         entry = entry->next;
 207                         end_cmd(cmd);
 208                 } while (entry);
 209         }
 210
 211         return HRTIMER_NORESTART;
 212 }
 213
 214 static void null_cmd_end_timer(struct nullb_cmd *cmd)
 215 {
 216         struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
 217
 218         cmd->ll_list.next = NULL;
 219         if (llist_add(&cmd->ll_list, &cq->list)) {
 220                 ktime_t kt = ktime_set(0, completion_nsec);
 221
 222                 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
 223         }
 224
 225         put_cpu();
 226 }
 227
 228 static void null_softirq_done_fn(struct request *rq)
 229 {
 230         if (queue_mode == NULL_Q_MQ)
 231                 end_cmd(blk_mq_rq_to_pdu(rq));
 232         else
 233                 end_cmd(rq->special);
 234 }
 235
 236 static inline void null_handle_cmd(struct nullb_cmd *cmd)
 237 {
 238         /* Complete IO by inline, softirq or timer */
 239         switch (irqmode) {
 240         case NULL_IRQ_SOFTIRQ:
 241                 switch (queue_mode)  {
 242                 case NULL_Q_MQ:
 243                         blk_mq_complete_request(cmd->rq);
 244                         break;
 245                 case NULL_Q_RQ:
 246                         blk_complete_request(cmd->rq);
 247                         break;
 248                 case NULL_Q_BIO:
 249                         /*
 250                          * XXX: no proper submitting cpu information available.
 251                          */
 252                         end_cmd(cmd);
 253                         break;
 254                 }
 255                 break;
 256         case NULL_IRQ_NONE:
 257                 end_cmd(cmd);
 258                 break;
 259         case NULL_IRQ_TIMER:
 260                 null_cmd_end_timer(cmd);
 261                 break;
 262         }
 263 }
 264
 265 static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
 266 {
 267         int index = 0;
 268
 269         if (nullb->nr_queues != 1)
 270                 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
 271
 272         return &nullb->queues[index];
 273 }
 274
 275 static void null_queue_bio(struct request_queue *q, struct bio *bio)
 276 {
 277         struct nullb *nullb = q->queuedata;
 278         struct nullb_queue *nq = nullb_to_queue(nullb);
 279         struct nullb_cmd *cmd;
 280
 281         cmd = alloc_cmd(nq, 1);
 282         cmd->bio = bio;
 283
 284         null_handle_cmd(cmd);
 285 }
 286
 287 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
 288 {
 289         struct nullb *nullb = q->queuedata;
 290         struct nullb_queue *nq = nullb_to_queue(nullb);
 291         struct nullb_cmd *cmd;
 292
 293         cmd = alloc_cmd(nq, 0);
 294         if (cmd) {
 295                 cmd->rq = req;
 296                 req->special = cmd;
 297                 return BLKPREP_OK;
 298         }
 299
 300         return BLKPREP_DEFER;
 301 }
 302
 303 static void null_request_fn(struct request_queue *q)
 304 {
 305         struct request *rq;
 306
 307         while ((rq = blk_fetch_request(q)) != NULL) {
 308                 struct nullb_cmd *cmd = rq->special;
 309
 310                 spin_unlock_irq(q->queue_lock);
 311                 null_handle_cmd(cmd);
 312                 spin_lock_irq(q->queue_lock);
 313         }
 314 }
 315
 316 static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
 317                 bool last)
 318 {
 319         struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 320
 321         cmd->rq = rq;
 322         cmd->nq = hctx->driver_data;
 323
 324         null_handle_cmd(cmd);
 325         return BLK_MQ_RQ_QUEUE_OK;
 326 }
 327
 328 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
 329 {
 330         BUG_ON(!nullb);
 331         BUG_ON(!nq);
 332
 333         init_waitqueue_head(&nq->wait);
 334         nq->queue_depth = nullb->queue_depth;
 335 }
 336
 337 static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 338                           unsigned int index)
 339 {
 340         struct nullb *nullb = data;
 341         struct nullb_queue *nq = &nullb->queues[index];
 342
 343         hctx->driver_data = nq;
 344         null_init_queue(nullb, nq);
 345         nullb->nr_queues++;
 346
 347         return 0;
 348 }
 349
 350 static struct blk_mq_ops null_mq_ops = {
 351         .queue_rq       = null_queue_rq,
 352         .map_queue      = blk_mq_map_queue,
 353         .init_hctx      = null_init_hctx,
 354         .complete       = null_softirq_done_fn,
 355 };
 356
 357 static void null_del_dev(struct nullb *nullb)
 358 {
 359         list_del_init(&nullb->list);
 360
 361         del_gendisk(nullb->disk);
 362         blk_cleanup_queue(nullb->q);
 363         if (queue_mode == NULL_Q_MQ)
 364                 blk_mq_free_tag_set(&nullb->tag_set);
 365         put_disk(nullb->disk);
 366         kfree(nullb);
 367 }
 368
 369 static int null_open(struct block_device *bdev, fmode_t mode)
 370 {
 371         return 0;
 372 }
 373
 374 static void null_release(struct gendisk *disk, fmode_t mode)
 375 {
 376 }
 377
 378 static const struct block_device_operations null_fops = {
 379         .owner =        THIS_MODULE,
 380         .open =         null_open,
 381         .release =      null_release,
 382 };
 383
 384 static int setup_commands(struct nullb_queue *nq)
 385 {
 386         struct nullb_cmd *cmd;
 387         int i, tag_size;
 388
 389         nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
 390         if (!nq->cmds)
 391                 return -ENOMEM;
 392
 393         tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
 394         nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
 395         if (!nq->tag_map) {
 396                 kfree(nq->cmds);
 397                 return -ENOMEM;
 398         }
 399
 400         for (i = 0; i < nq->queue_depth; i++) {
 401                 cmd = &nq->cmds[i];
 402                 INIT_LIST_HEAD(&cmd->list);
 403                 cmd->ll_list.next = NULL;
 404                 cmd->tag = -1U;
 405         }
 406
 407         return 0;
 408 }
 409
 410 static void cleanup_queue(struct nullb_queue *nq)
 411 {
 412         kfree(nq->tag_map);
 413         kfree(nq->cmds);
 414 }
 415
 416 static void cleanup_queues(struct nullb *nullb)
 417 {
 418         int i;
 419
 420         for (i = 0; i < nullb->nr_queues; i++)
 421                 cleanup_queue(&nullb->queues[i]);
 422
 423         kfree(nullb->queues);
 424 }
 425
 426 static int setup_queues(struct nullb *nullb)
 427 {
 428         nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
 429                                                                 GFP_KERNEL);
 430         if (!nullb->queues)
 431                 return -ENOMEM;
 432
 433         nullb->nr_queues = 0;
 434         nullb->queue_depth = hw_queue_depth;
 435
 436         return 0;
 437 }
 438
 439 static int init_driver_queues(struct nullb *nullb)
 440 {
 441         struct nullb_queue *nq;
 442         int i, ret = 0;
 443
 444         for (i = 0; i < submit_queues; i++) {
 445                 nq = &nullb->queues[i];
 446
 447                 null_init_queue(nullb, nq);
 448
 449                 ret = setup_commands(nq);
 450                 if (ret)
 451                         goto err_queue;
 452                 nullb->nr_queues++;
 453         }
 454
 455         return 0;
 456 err_queue:
 457         cleanup_queues(nullb);
 458         return ret;
 459 }
 460
 461 static int null_add_dev(void)
 462 {
 463         struct gendisk *disk;
 464         struct nullb *nullb;
 465         sector_t size;
 466         int rv;
 467
 468         nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
 469         if (!nullb) {
 470                 rv = -ENOMEM;
 471                 goto out;
 472         }
 473
 474         spin_lock_init(&nullb->lock);
 475
 476         if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
 477                 submit_queues = nr_online_nodes;
 478
 479         rv = setup_queues(nullb);
 480         if (rv)
 481                 goto out_free_nullb;
 482
 483         if (queue_mode == NULL_Q_MQ) {
 484                 nullb->tag_set.ops = &null_mq_ops;
 485                 nullb->tag_set.nr_hw_queues = submit_queues;
 486                 nullb->tag_set.queue_depth = hw_queue_depth;
 487                 nullb->tag_set.numa_node = home_node;
 488                 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
 489                 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 490                 nullb->tag_set.driver_data = nullb;
 491
 492                 rv = blk_mq_alloc_tag_set(&nullb->tag_set);
 493                 if (rv)
 494                         goto out_cleanup_queues;
 495
 496                 nullb->q = blk_mq_init_queue(&nullb->tag_set);
 497                 if (!nullb->q) {
 498                         rv = -ENOMEM;
 499                         goto out_cleanup_tags;
 500                 }
 501         } else if (queue_mode == NULL_Q_BIO) {
 502                 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
 503                 if (!nullb->q) {
 504                         rv = -ENOMEM;
 505                         goto out_cleanup_queues;
 506                 }
 507                 blk_queue_make_request(nullb->q, null_queue_bio);
 508                 init_driver_queues(nullb);
 509         } else {
 510                 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
 511                 if (!nullb->q) {
 512                         rv = -ENOMEM;
 513                         goto out_cleanup_queues;
 514                 }
 515                 blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
 516                 blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
 517                 init_driver_queues(nullb);
 518         }
 519
 520         nullb->q->queuedata = nullb;
 521         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 522
 523         disk = nullb->disk = alloc_disk_node(1, home_node);
 524         if (!disk) {
 525                 rv = -ENOMEM;
 526                 goto out_cleanup_blk_queue;
 527         }
 528
 529         mutex_lock(&lock);
 530         list_add_tail(&nullb->list, &nullb_list);
 531         nullb->index = nullb_indexes++;
 532         mutex_unlock(&lock);
 533
 534         blk_queue_logical_block_size(nullb->q, bs);
 535         blk_queue_physical_block_size(nullb->q, bs);
 536
 537         size = gb * 1024 * 1024 * 1024ULL;
 538         sector_div(size, bs);
 539         set_capacity(disk, size);
 540
 541         disk->flags |= GENHD_FL_EXT_DEVT;
 542         disk->major             = null_major;
 543         disk->first_minor       = nullb->index;
 544         disk->fops              = &null_fops;
 545         disk->private_data      = nullb;
 546         disk->queue             = nullb->q;
 547         sprintf(disk->disk_name, "nullb%d", nullb->index);
 548         add_disk(disk);
 549         return 0;
 550
 551 out_cleanup_blk_queue:
 552         blk_cleanup_queue(nullb->q);
 553 out_cleanup_tags:
 554         if (queue_mode == NULL_Q_MQ)
 555                 blk_mq_free_tag_set(&nullb->tag_set);
 556 out_cleanup_queues:
 557         cleanup_queues(nullb);
 558 out_free_nullb:
 559         kfree(nullb);
 560 out:
 561         return rv;
 562 }
 563
 564 static int __init null_init(void)
 565 {
 566         unsigned int i;
 567
 568         if (bs > PAGE_SIZE) {
 569                 pr_warn("null_blk: invalid block size\n");
 570                 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
 571                 bs = PAGE_SIZE;
 572         }
 573
 574         if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 575                 if (submit_queues < nr_online_nodes) {
 576                         pr_warn("null_blk: submit_queues param is set to %u.",
 577                                                         nr_online_nodes);
 578                         submit_queues = nr_online_nodes;
 579                 }
 580         } else if (submit_queues > nr_cpu_ids)
 581                 submit_queues = nr_cpu_ids;
 582         else if (!submit_queues)
 583                 submit_queues = 1;
 584
 585         mutex_init(&lock);
 586
 587         /* Initialize a separate list for each CPU for issuing softirqs */
 588         for_each_possible_cpu(i) {
 589                 struct completion_queue *cq = &per_cpu(completion_queues, i);
 590
 591                 init_llist_head(&cq->list);
 592
 593                 if (irqmode != NULL_IRQ_TIMER)
 594                         continue;
 595
 596                 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 597                 cq->timer.function = null_cmd_timer_expired;
 598         }
 599
 600         null_major = register_blkdev(0, "nullb");
 601         if (null_major < 0)
 602                 return null_major;
 603
 604         for (i = 0; i < nr_devices; i++) {
 605                 if (null_add_dev()) {
 606                         unregister_blkdev(null_major, "nullb");
 607                         return -EINVAL;
 608                 }
 609         }
 610
 611         pr_info("null: module loaded\n");
 612         return 0;
 613 }
 614
 615 static void __exit null_exit(void)
 616 {
 617         struct nullb *nullb;
 618
 619         unregister_blkdev(null_major, "nullb");
 620
 621         mutex_lock(&lock);
 622         while (!list_empty(&nullb_list)) {
 623                 nullb = list_entry(nullb_list.next, struct nullb, list);
 624                 null_del_dev(nullb);
 625         }
 626         mutex_unlock(&lock);
 627 }
 628
 629 module_init(null_init);
 630 module_exit(null_exit);
 631
 632 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
 633 MODULE_LICENSE("GPL");