drivers/nvme/target/core.c

   1 /*
   2  * Common code for the NVMe target.
   3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15 #include <linux/module.h>
  16 #include <linux/random.h>
  17 #include <linux/rculist.h>
  18
  19 #include "nvmet.h"
  20
  21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
  22 static DEFINE_IDA(cntlid_ida);
  23
  24 /*
  25  * This read/write semaphore is used to synchronize access to configuration
  26  * information on a target system that will result in discovery log page
  27  * information change for at least one host.
  28  * The full list of resources to protected by this semaphore is:
  29  *
  30  *  - subsystems list
  31  *  - per-subsystem allowed hosts list
  32  *  - allow_any_host subsystem attribute
  33  *  - nvmet_genctr
  34  *  - the nvmet_transports array
  35  *
  36  * When updating any of those lists/structures write lock should be obtained,
  37  * while when reading (popolating discovery log page or checking host-subsystem
  38  * link) read lock is obtained to allow concurrent reads.
  39  */
  40 DECLARE_RWSEM(nvmet_config_sem);
  41
  42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
  43                 const char *subsysnqn);
  44
  45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
  46                 size_t len)
  47 {
  48         if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  49                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  50         return 0;
  51 }
  52
  53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
  54 {
  55         if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  56                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  57         return 0;
  58 }
  59
  60 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
  61 {
  62         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
  63 }
  64
  65 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
  66 {
  67         struct nvmet_req *req;
  68
  69         while (1) {
  70                 mutex_lock(&ctrl->lock);
  71                 if (!ctrl->nr_async_event_cmds) {
  72                         mutex_unlock(&ctrl->lock);
  73                         return;
  74                 }
  75
  76                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  77                 mutex_unlock(&ctrl->lock);
  78                 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
  79         }
  80 }
  81
  82 static void nvmet_async_event_work(struct work_struct *work)
  83 {
  84         struct nvmet_ctrl *ctrl =
  85                 container_of(work, struct nvmet_ctrl, async_event_work);
  86         struct nvmet_async_event *aen;
  87         struct nvmet_req *req;
  88
  89         while (1) {
  90                 mutex_lock(&ctrl->lock);
  91                 aen = list_first_entry_or_null(&ctrl->async_events,
  92                                 struct nvmet_async_event, entry);
  93                 if (!aen || !ctrl->nr_async_event_cmds) {
  94                         mutex_unlock(&ctrl->lock);
  95                         return;
  96                 }
  97
  98                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  99                 nvmet_set_result(req, nvmet_async_event_result(aen));
 100
 101                 list_del(&aen->entry);
 102                 kfree(aen);
 103
 104                 mutex_unlock(&ctrl->lock);
 105                 nvmet_req_complete(req, 0);
 106         }
 107 }
 108
 109 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
 110                 u8 event_info, u8 log_page)
 111 {
 112         struct nvmet_async_event *aen;
 113
 114         aen = kmalloc(sizeof(*aen), GFP_KERNEL);
 115         if (!aen)
 116                 return;
 117
 118         aen->event_type = event_type;
 119         aen->event_info = event_info;
 120         aen->log_page = log_page;
 121
 122         mutex_lock(&ctrl->lock);
 123         list_add_tail(&aen->entry, &ctrl->async_events);
 124         mutex_unlock(&ctrl->lock);
 125
 126         schedule_work(&ctrl->async_event_work);
 127 }
 128
 129 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
 130 {
 131         int ret = 0;
 132
 133         down_write(&nvmet_config_sem);
 134         if (nvmet_transports[ops->type])
 135                 ret = -EINVAL;
 136         else
 137                 nvmet_transports[ops->type] = ops;
 138         up_write(&nvmet_config_sem);
 139
 140         return ret;
 141 }
 142 EXPORT_SYMBOL_GPL(nvmet_register_transport);
 143
 144 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
 145 {
 146         down_write(&nvmet_config_sem);
 147         nvmet_transports[ops->type] = NULL;
 148         up_write(&nvmet_config_sem);
 149 }
 150 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 151
 152 int nvmet_enable_port(struct nvmet_port *port)
 153 {
 154         struct nvmet_fabrics_ops *ops;
 155         int ret;
 156
 157         lockdep_assert_held(&nvmet_config_sem);
 158
 159         ops = nvmet_transports[port->disc_addr.trtype];
 160         if (!ops) {
 161                 up_write(&nvmet_config_sem);
 162                 request_module("nvmet-transport-%d", port->disc_addr.trtype);
 163                 down_write(&nvmet_config_sem);
 164                 ops = nvmet_transports[port->disc_addr.trtype];
 165                 if (!ops) {
 166                         pr_err("transport type %d not supported\n",
 167                                 port->disc_addr.trtype);
 168                         return -EINVAL;
 169                 }
 170         }
 171
 172         if (!try_module_get(ops->owner))
 173                 return -EINVAL;
 174
 175         ret = ops->add_port(port);
 176         if (ret) {
 177                 module_put(ops->owner);
 178                 return ret;
 179         }
 180
 181         port->enabled = true;
 182         return 0;
 183 }
 184
 185 void nvmet_disable_port(struct nvmet_port *port)
 186 {
 187         struct nvmet_fabrics_ops *ops;
 188
 189         lockdep_assert_held(&nvmet_config_sem);
 190
 191         port->enabled = false;
 192
 193         ops = nvmet_transports[port->disc_addr.trtype];
 194         ops->remove_port(port);
 195         module_put(ops->owner);
 196 }
 197
 198 static void nvmet_keep_alive_timer(struct work_struct *work)
 199 {
 200         struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 201                         struct nvmet_ctrl, ka_work);
 202
 203         pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
 204                 ctrl->cntlid, ctrl->kato);
 205
 206         nvmet_ctrl_fatal_error(ctrl);
 207 }
 208
 209 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 210 {
 211         pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 212                 ctrl->cntlid, ctrl->kato);
 213
 214         INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 215         schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 216 }
 217
 218 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 219 {
 220         pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 221
 222         cancel_delayed_work_sync(&ctrl->ka_work);
 223 }
 224
 225 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
 226                 __le32 nsid)
 227 {
 228         struct nvmet_ns *ns;
 229
 230         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
 231                 if (ns->nsid == le32_to_cpu(nsid))
 232                         return ns;
 233         }
 234
 235         return NULL;
 236 }
 237
 238 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 239 {
 240         struct nvmet_ns *ns;
 241
 242         rcu_read_lock();
 243         ns = __nvmet_find_namespace(ctrl, nsid);
 244         if (ns)
 245                 percpu_ref_get(&ns->ref);
 246         rcu_read_unlock();
 247
 248         return ns;
 249 }
 250
 251 static void nvmet_destroy_namespace(struct percpu_ref *ref)
 252 {
 253         struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
 254
 255         complete(&ns->disable_done);
 256 }
 257
 258 void nvmet_put_namespace(struct nvmet_ns *ns)
 259 {
 260         percpu_ref_put(&ns->ref);
 261 }
 262
 263 int nvmet_ns_enable(struct nvmet_ns *ns)
 264 {
 265         struct nvmet_subsys *subsys = ns->subsys;
 266         struct nvmet_ctrl *ctrl;
 267         int ret = 0;
 268
 269         mutex_lock(&subsys->lock);
 270         if (ns->enabled)
 271                 goto out_unlock;
 272
 273         ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
 274                         NULL);
 275         if (IS_ERR(ns->bdev)) {
 276                 pr_err("failed to open block device %s: (%ld)\n",
 277                        ns->device_path, PTR_ERR(ns->bdev));
 278                 ret = PTR_ERR(ns->bdev);
 279                 ns->bdev = NULL;
 280                 goto out_unlock;
 281         }
 282
 283         ns->size = i_size_read(ns->bdev->bd_inode);
 284         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 285
 286         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 287                                 0, GFP_KERNEL);
 288         if (ret)
 289                 goto out_blkdev_put;
 290
 291         if (ns->nsid > subsys->max_nsid)
 292                 subsys->max_nsid = ns->nsid;
 293
 294         /*
 295          * The namespaces list needs to be sorted to simplify the implementation
 296          * of the Identify Namepace List subcommand.
 297          */
 298         if (list_empty(&subsys->namespaces)) {
 299                 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
 300         } else {
 301                 struct nvmet_ns *old;
 302
 303                 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
 304                         BUG_ON(ns->nsid == old->nsid);
 305                         if (ns->nsid < old->nsid)
 306                                 break;
 307                 }
 308
 309                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 310         }
 311
 312         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 313                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 314
 315         ns->enabled = true;
 316         ret = 0;
 317 out_unlock:
 318         mutex_unlock(&subsys->lock);
 319         return ret;
 320 out_blkdev_put:
 321         blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 322         ns->bdev = NULL;
 323         goto out_unlock;
 324 }
 325
 326 void nvmet_ns_disable(struct nvmet_ns *ns)
 327 {
 328         struct nvmet_subsys *subsys = ns->subsys;
 329         struct nvmet_ctrl *ctrl;
 330
 331         mutex_lock(&subsys->lock);
 332         if (!ns->enabled)
 333                 goto out_unlock;
 334
 335         ns->enabled = false;
 336         list_del_rcu(&ns->dev_link);
 337         mutex_unlock(&subsys->lock);
 338
 339         /*
 340          * Now that we removed the namespaces from the lookup list, we
 341          * can kill the per_cpu ref and wait for any remaining references
 342          * to be dropped, as well as a RCU grace period for anyone only
 343          * using the namepace under rcu_read_lock().  Note that we can't
 344          * use call_rcu here as we need to ensure the namespaces have
 345          * been fully destroyed before unloading the module.
 346          */
 347         percpu_ref_kill(&ns->ref);
 348         synchronize_rcu();
 349         wait_for_completion(&ns->disable_done);
 350         percpu_ref_exit(&ns->ref);
 351
 352         mutex_lock(&subsys->lock);
 353         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 354                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 355
 356         if (ns->bdev)
 357                 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 358 out_unlock:
 359         mutex_unlock(&subsys->lock);
 360 }
 361
 362 void nvmet_ns_free(struct nvmet_ns *ns)
 363 {
 364         nvmet_ns_disable(ns);
 365
 366         kfree(ns->device_path);
 367         kfree(ns);
 368 }
 369
 370 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 371 {
 372         struct nvmet_ns *ns;
 373
 374         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 375         if (!ns)
 376                 return NULL;
 377
 378         INIT_LIST_HEAD(&ns->dev_link);
 379         init_completion(&ns->disable_done);
 380
 381         ns->nsid = nsid;
 382         ns->subsys = subsys;
 383         uuid_gen(&ns->uuid);
 384
 385         return ns;
 386 }
 387
 388 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 389 {
 390         u32 old_sqhd, new_sqhd;
 391         u16 sqhd;
 392
 393         if (status)
 394                 nvmet_set_status(req, status);
 395
 396         if (req->sq->size) {
 397                 do {
 398                         old_sqhd = req->sq->sqhd;
 399                         new_sqhd = (old_sqhd + 1) % req->sq->size;
 400                 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
 401                                         old_sqhd);
 402         }
 403         sqhd = req->sq->sqhd & 0x0000FFFF;
 404         req->rsp->sq_head = cpu_to_le16(sqhd);
 405         req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 406         req->rsp->command_id = req->cmd->common.command_id;
 407
 408         if (req->ns)
 409                 nvmet_put_namespace(req->ns);
 410         req->ops->queue_response(req);
 411 }
 412
 413 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 414 {
 415         __nvmet_req_complete(req, status);
 416         percpu_ref_put(&req->sq->ref);
 417 }
 418 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 419
 420 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 421                 u16 qid, u16 size)
 422 {
 423         cq->qid = qid;
 424         cq->size = size;
 425
 426         ctrl->cqs[qid] = cq;
 427 }
 428
 429 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 430                 u16 qid, u16 size)
 431 {
 432         sq->sqhd = 0;
 433         sq->qid = qid;
 434         sq->size = size;
 435
 436         ctrl->sqs[qid] = sq;
 437 }
 438
 439 static void nvmet_confirm_sq(struct percpu_ref *ref)
 440 {
 441         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 442
 443         complete(&sq->confirm_done);
 444 }
 445
 446 void nvmet_sq_destroy(struct nvmet_sq *sq)
 447 {
 448         /*
 449          * If this is the admin queue, complete all AERs so that our
 450          * queue doesn't have outstanding requests on it.
 451          */
 452         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 453                 nvmet_async_events_free(sq->ctrl);
 454         percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 455         wait_for_completion(&sq->confirm_done);
 456         wait_for_completion(&sq->free_done);
 457         percpu_ref_exit(&sq->ref);
 458
 459         if (sq->ctrl) {
 460                 nvmet_ctrl_put(sq->ctrl);
 461                 sq->ctrl = NULL; /* allows reusing the queue later */
 462         }
 463 }
 464 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
 465
 466 static void nvmet_sq_free(struct percpu_ref *ref)
 467 {
 468         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 469
 470         complete(&sq->free_done);
 471 }
 472
 473 int nvmet_sq_init(struct nvmet_sq *sq)
 474 {
 475         int ret;
 476
 477         ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
 478         if (ret) {
 479                 pr_err("percpu_ref init failed!\n");
 480                 return ret;
 481         }
 482         init_completion(&sq->free_done);
 483         init_completion(&sq->confirm_done);
 484
 485         return 0;
 486 }
 487 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 488
 489 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 490                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
 491 {
 492         u8 flags = req->cmd->common.flags;
 493         u16 status;
 494
 495         req->cq = cq;
 496         req->sq = sq;
 497         req->ops = ops;
 498         req->sg = NULL;
 499         req->sg_cnt = 0;
 500         req->rsp->status = 0;
 501
 502         /* no support for fused commands yet */
 503         if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
 504                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 505                 goto fail;
 506         }
 507
 508         /* either variant of SGLs is fine, as we don't support metadata */
 509         if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
 510                      (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
 511                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 512                 goto fail;
 513         }
 514
 515         if (unlikely(!req->sq->ctrl))
 516                 /* will return an error for any Non-connect command: */
 517                 status = nvmet_parse_connect_cmd(req);
 518         else if (likely(req->sq->qid != 0))
 519                 status = nvmet_parse_io_cmd(req);
 520         else if (req->cmd->common.opcode == nvme_fabrics_command)
 521                 status = nvmet_parse_fabrics_cmd(req);
 522         else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
 523                 status = nvmet_parse_discovery_cmd(req);
 524         else
 525                 status = nvmet_parse_admin_cmd(req);
 526
 527         if (status)
 528                 goto fail;
 529
 530         if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
 531                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 532                 goto fail;
 533         }
 534
 535         return true;
 536
 537 fail:
 538         __nvmet_req_complete(req, status);
 539         return false;
 540 }
 541 EXPORT_SYMBOL_GPL(nvmet_req_init);
 542
 543 void nvmet_req_uninit(struct nvmet_req *req)
 544 {
 545         percpu_ref_put(&req->sq->ref);
 546 }
 547 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
 548
 549 static inline bool nvmet_cc_en(u32 cc)
 550 {
 551         return (cc >> NVME_CC_EN_SHIFT) & 0x1;
 552 }
 553
 554 static inline u8 nvmet_cc_css(u32 cc)
 555 {
 556         return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
 557 }
 558
 559 static inline u8 nvmet_cc_mps(u32 cc)
 560 {
 561         return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
 562 }
 563
 564 static inline u8 nvmet_cc_ams(u32 cc)
 565 {
 566         return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
 567 }
 568
 569 static inline u8 nvmet_cc_shn(u32 cc)
 570 {
 571         return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
 572 }
 573
 574 static inline u8 nvmet_cc_iosqes(u32 cc)
 575 {
 576         return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
 577 }
 578
 579 static inline u8 nvmet_cc_iocqes(u32 cc)
 580 {
 581         return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
 582 }
 583
 584 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 585 {
 586         lockdep_assert_held(&ctrl->lock);
 587
 588         if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
 589             nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
 590             nvmet_cc_mps(ctrl->cc) != 0 ||
 591             nvmet_cc_ams(ctrl->cc) != 0 ||
 592             nvmet_cc_css(ctrl->cc) != 0) {
 593                 ctrl->csts = NVME_CSTS_CFS;
 594                 return;
 595         }
 596
 597         ctrl->csts = NVME_CSTS_RDY;
 598 }
 599
 600 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
 601 {
 602         lockdep_assert_held(&ctrl->lock);
 603
 604         /* XXX: tear down queues? */
 605         ctrl->csts &= ~NVME_CSTS_RDY;
 606         ctrl->cc = 0;
 607 }
 608
 609 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
 610 {
 611         u32 old;
 612
 613         mutex_lock(&ctrl->lock);
 614         old = ctrl->cc;
 615         ctrl->cc = new;
 616
 617         if (nvmet_cc_en(new) && !nvmet_cc_en(old))
 618                 nvmet_start_ctrl(ctrl);
 619         if (!nvmet_cc_en(new) && nvmet_cc_en(old))
 620                 nvmet_clear_ctrl(ctrl);
 621         if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
 622                 nvmet_clear_ctrl(ctrl);
 623                 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
 624         }
 625         if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
 626                 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
 627         mutex_unlock(&ctrl->lock);
 628 }
 629
 630 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 631 {
 632         /* command sets supported: NVMe command set: */
 633         ctrl->cap = (1ULL << 37);
 634         /* CC.EN timeout in 500msec units: */
 635         ctrl->cap |= (15ULL << 24);
 636         /* maximum queue entries supported: */
 637         ctrl->cap |= NVMET_QUEUE_SIZE - 1;
 638 }
 639
 640 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 641                 struct nvmet_req *req, struct nvmet_ctrl **ret)
 642 {
 643         struct nvmet_subsys *subsys;
 644         struct nvmet_ctrl *ctrl;
 645         u16 status = 0;
 646
 647         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 648         if (!subsys) {
 649                 pr_warn("connect request for invalid subsystem %s!\n",
 650                         subsysnqn);
 651                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 652                 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 653         }
 654
 655         mutex_lock(&subsys->lock);
 656         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 657                 if (ctrl->cntlid == cntlid) {
 658                         if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
 659                                 pr_warn("hostnqn mismatch.\n");
 660                                 continue;
 661                         }
 662                         if (!kref_get_unless_zero(&ctrl->ref))
 663                                 continue;
 664
 665                         *ret = ctrl;
 666                         goto out;
 667                 }
 668         }
 669
 670         pr_warn("could not find controller %d for subsys %s / host %s\n",
 671                 cntlid, subsysnqn, hostnqn);
 672         req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 673         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 674
 675 out:
 676         mutex_unlock(&subsys->lock);
 677         nvmet_subsys_put(subsys);
 678         return status;
 679 }
 680
 681 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
 682 {
 683         if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
 684                 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
 685                        cmd->common.opcode, req->sq->qid);
 686                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 687         }
 688
 689         if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
 690                 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
 691                        cmd->common.opcode, req->sq->qid);
 692                 req->ns = NULL;
 693                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 694         }
 695         return 0;
 696 }
 697
 698 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
 699                 const char *hostnqn)
 700 {
 701         struct nvmet_host_link *p;
 702
 703         if (subsys->allow_any_host)
 704                 return true;
 705
 706         list_for_each_entry(p, &subsys->hosts, entry) {
 707                 if (!strcmp(nvmet_host_name(p->host), hostnqn))
 708                         return true;
 709         }
 710
 711         return false;
 712 }
 713
 714 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
 715                 const char *hostnqn)
 716 {
 717         struct nvmet_subsys_link *s;
 718
 719         list_for_each_entry(s, &req->port->subsystems, entry) {
 720                 if (__nvmet_host_allowed(s->subsys, hostnqn))
 721                         return true;
 722         }
 723
 724         return false;
 725 }
 726
 727 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 728                 const char *hostnqn)
 729 {
 730         lockdep_assert_held(&nvmet_config_sem);
 731
 732         if (subsys->type == NVME_NQN_DISC)
 733                 return nvmet_host_discovery_allowed(req, hostnqn);
 734         else
 735                 return __nvmet_host_allowed(subsys, hostnqn);
 736 }
 737
 738 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 739                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
 740 {
 741         struct nvmet_subsys *subsys;
 742         struct nvmet_ctrl *ctrl;
 743         int ret;
 744         u16 status;
 745
 746         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 747         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 748         if (!subsys) {
 749                 pr_warn("connect request for invalid subsystem %s!\n",
 750                         subsysnqn);
 751                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 752                 goto out;
 753         }
 754
 755         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 756         down_read(&nvmet_config_sem);
 757         if (!nvmet_host_allowed(req, subsys, hostnqn)) {
 758                 pr_info("connect by host %s for subsystem %s not allowed\n",
 759                         hostnqn, subsysnqn);
 760                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 761                 up_read(&nvmet_config_sem);
 762                 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
 763                 goto out_put_subsystem;
 764         }
 765         up_read(&nvmet_config_sem);
 766
 767         status = NVME_SC_INTERNAL;
 768         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 769         if (!ctrl)
 770                 goto out_put_subsystem;
 771         mutex_init(&ctrl->lock);
 772
 773         nvmet_init_cap(ctrl);
 774
 775         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 776         INIT_LIST_HEAD(&ctrl->async_events);
 777
 778         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 779         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 780
 781         kref_init(&ctrl->ref);
 782         ctrl->subsys = subsys;
 783
 784         ctrl->cqs = kcalloc(subsys->max_qid + 1,
 785                         sizeof(struct nvmet_cq *),
 786                         GFP_KERNEL);
 787         if (!ctrl->cqs)
 788                 goto out_free_ctrl;
 789
 790         ctrl->sqs = kcalloc(subsys->max_qid + 1,
 791                         sizeof(struct nvmet_sq *),
 792                         GFP_KERNEL);
 793         if (!ctrl->sqs)
 794                 goto out_free_cqs;
 795
 796         ret = ida_simple_get(&cntlid_ida,
 797                              NVME_CNTLID_MIN, NVME_CNTLID_MAX,
 798                              GFP_KERNEL);
 799         if (ret < 0) {
 800                 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 801                 goto out_free_sqs;
 802         }
 803         ctrl->cntlid = ret;
 804
 805         ctrl->ops = req->ops;
 806         if (ctrl->subsys->type == NVME_NQN_DISC) {
 807                 /* Don't accept keep-alive timeout for discovery controllers */
 808                 if (kato) {
 809                         status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 810                         goto out_free_sqs;
 811                 }
 812
 813                 /*
 814                  * Discovery controllers use some arbitrary high value in order
 815                  * to cleanup stale discovery sessions
 816                  *
 817                  * From the latest base diff RC:
 818                  * "The Keep Alive command is not supported by
 819                  * Discovery controllers. A transport may specify a
 820                  * fixed Discovery controller activity timeout value
 821                  * (e.g., 2 minutes).  If no commands are received
 822                  * by a Discovery controller within that time
 823                  * period, the controller may perform the
 824                  * actions for Keep Alive Timer expiration".
 825                  */
 826                 ctrl->kato = NVMET_DISC_KATO;
 827         } else {
 828                 /* keep-alive timeout in seconds */
 829                 ctrl->kato = DIV_ROUND_UP(kato, 1000);
 830         }
 831         nvmet_start_keep_alive_timer(ctrl);
 832
 833         mutex_lock(&subsys->lock);
 834         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
 835         mutex_unlock(&subsys->lock);
 836
 837         *ctrlp = ctrl;
 838         return 0;
 839
 840 out_free_sqs:
 841         kfree(ctrl->sqs);
 842 out_free_cqs:
 843         kfree(ctrl->cqs);
 844 out_free_ctrl:
 845         kfree(ctrl);
 846 out_put_subsystem:
 847         nvmet_subsys_put(subsys);
 848 out:
 849         return status;
 850 }
 851
 852 static void nvmet_ctrl_free(struct kref *ref)
 853 {
 854         struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 855         struct nvmet_subsys *subsys = ctrl->subsys;
 856
 857         nvmet_stop_keep_alive_timer(ctrl);
 858
 859         mutex_lock(&subsys->lock);
 860         list_del(&ctrl->subsys_entry);
 861         mutex_unlock(&subsys->lock);
 862
 863         flush_work(&ctrl->async_event_work);
 864         cancel_work_sync(&ctrl->fatal_err_work);
 865
 866         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 867         nvmet_subsys_put(subsys);
 868
 869         kfree(ctrl->sqs);
 870         kfree(ctrl->cqs);
 871         kfree(ctrl);
 872 }
 873
 874 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
 875 {
 876         kref_put(&ctrl->ref, nvmet_ctrl_free);
 877 }
 878
 879 static void nvmet_fatal_error_handler(struct work_struct *work)
 880 {
 881         struct nvmet_ctrl *ctrl =
 882                         container_of(work, struct nvmet_ctrl, fatal_err_work);
 883
 884         pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
 885         ctrl->ops->delete_ctrl(ctrl);
 886 }
 887
 888 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 889 {
 890         mutex_lock(&ctrl->lock);
 891         if (!(ctrl->csts & NVME_CSTS_CFS)) {
 892                 ctrl->csts |= NVME_CSTS_CFS;
 893                 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
 894                 schedule_work(&ctrl->fatal_err_work);
 895         }
 896         mutex_unlock(&ctrl->lock);
 897 }
 898 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
 899
 900 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 901                 const char *subsysnqn)
 902 {
 903         struct nvmet_subsys_link *p;
 904
 905         if (!port)
 906                 return NULL;
 907
 908         if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
 909                         NVMF_NQN_SIZE)) {
 910                 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 911                         return NULL;
 912                 return nvmet_disc_subsys;
 913         }
 914
 915         down_read(&nvmet_config_sem);
 916         list_for_each_entry(p, &port->subsystems, entry) {
 917                 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
 918                                 NVMF_NQN_SIZE)) {
 919                         if (!kref_get_unless_zero(&p->subsys->ref))
 920                                 break;
 921                         up_read(&nvmet_config_sem);
 922                         return p->subsys;
 923                 }
 924         }
 925         up_read(&nvmet_config_sem);
 926         return NULL;
 927 }
 928
 929 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 930                 enum nvme_subsys_type type)
 931 {
 932         struct nvmet_subsys *subsys;
 933
 934         subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 935         if (!subsys)
 936                 return NULL;
 937
 938         subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 939         /* generate a random serial number as our controllers are ephemeral: */
 940         get_random_bytes(&subsys->serial, sizeof(subsys->serial));
 941
 942         switch (type) {
 943         case NVME_NQN_NVME:
 944                 subsys->max_qid = NVMET_NR_QUEUES;
 945                 break;
 946         case NVME_NQN_DISC:
 947                 subsys->max_qid = 0;
 948                 break;
 949         default:
 950                 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
 951                 kfree(subsys);
 952                 return NULL;
 953         }
 954         subsys->type = type;
 955         subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 956                         GFP_KERNEL);
 957         if (!subsys->subsysnqn) {
 958                 kfree(subsys);
 959                 return NULL;
 960         }
 961
 962         kref_init(&subsys->ref);
 963
 964         mutex_init(&subsys->lock);
 965         INIT_LIST_HEAD(&subsys->namespaces);
 966         INIT_LIST_HEAD(&subsys->ctrls);
 967         INIT_LIST_HEAD(&subsys->hosts);
 968
 969         return subsys;
 970 }
 971
 972 static void nvmet_subsys_free(struct kref *ref)
 973 {
 974         struct nvmet_subsys *subsys =
 975                 container_of(ref, struct nvmet_subsys, ref);
 976
 977         WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 978
 979         kfree(subsys->subsysnqn);
 980         kfree(subsys);
 981 }
 982
 983 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
 984 {
 985         struct nvmet_ctrl *ctrl;
 986
 987         mutex_lock(&subsys->lock);
 988         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 989                 ctrl->ops->delete_ctrl(ctrl);
 990         mutex_unlock(&subsys->lock);
 991 }
 992
 993 void nvmet_subsys_put(struct nvmet_subsys *subsys)
 994 {
 995         kref_put(&subsys->ref, nvmet_subsys_free);
 996 }
 997
 998 static int __init nvmet_init(void)
 999 {
1000         int error;
1001
1002         error = nvmet_init_discovery();
1003         if (error)
1004                 goto out;
1005
1006         error = nvmet_init_configfs();
1007         if (error)
1008                 goto out_exit_discovery;
1009         return 0;
1010
1011 out_exit_discovery:
1012         nvmet_exit_discovery();
1013 out:
1014         return error;
1015 }
1016
1017 static void __exit nvmet_exit(void)
1018 {
1019         nvmet_exit_configfs();
1020         nvmet_exit_discovery();
1021         ida_destroy(&cntlid_ida);
1022
1023         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1024         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1025 }
1026
1027 module_init(nvmet_init);
1028 module_exit(nvmet_exit);
1029
1030 MODULE_LICENSE("GPL v2");