drivers/nvme/target/core.c

   1 /*
   2  * Common code for the NVMe target.
   3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15 #include <linux/module.h>
  16 #include <linux/random.h>
  17 #include <linux/rculist.h>
  18
  19 #include "nvmet.h"
  20
  21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
  22 static DEFINE_IDA(cntlid_ida);
  23
  24 /*
  25  * This read/write semaphore is used to synchronize access to configuration
  26  * information on a target system that will result in discovery log page
  27  * information change for at least one host.
  28  * The full list of resources to protected by this semaphore is:
  29  *
  30  *  - subsystems list
  31  *  - per-subsystem allowed hosts list
  32  *  - allow_any_host subsystem attribute
  33  *  - nvmet_genctr
  34  *  - the nvmet_transports array
  35  *
  36  * When updating any of those lists/structures write lock should be obtained,
  37  * while when reading (popolating discovery log page or checking host-subsystem
  38  * link) read lock is obtained to allow concurrent reads.
  39  */
  40 DECLARE_RWSEM(nvmet_config_sem);
  41
  42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
  43                 const char *subsysnqn);
  44
  45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
  46                 size_t len)
  47 {
  48         if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  49                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  50         return 0;
  51 }
  52
  53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
  54 {
  55         if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  56                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  57         return 0;
  58 }
  59
  60 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
  61 {
  62         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
  63 }
  64
  65 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
  66 {
  67         struct nvmet_req *req;
  68
  69         while (1) {
  70                 mutex_lock(&ctrl->lock);
  71                 if (!ctrl->nr_async_event_cmds) {
  72                         mutex_unlock(&ctrl->lock);
  73                         return;
  74                 }
  75
  76                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  77                 mutex_unlock(&ctrl->lock);
  78                 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
  79         }
  80 }
  81
  82 static void nvmet_async_event_work(struct work_struct *work)
  83 {
  84         struct nvmet_ctrl *ctrl =
  85                 container_of(work, struct nvmet_ctrl, async_event_work);
  86         struct nvmet_async_event *aen;
  87         struct nvmet_req *req;
  88
  89         while (1) {
  90                 mutex_lock(&ctrl->lock);
  91                 aen = list_first_entry_or_null(&ctrl->async_events,
  92                                 struct nvmet_async_event, entry);
  93                 if (!aen || !ctrl->nr_async_event_cmds) {
  94                         mutex_unlock(&ctrl->lock);
  95                         return;
  96                 }
  97
  98                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  99                 nvmet_set_result(req, nvmet_async_event_result(aen));
 100
 101                 list_del(&aen->entry);
 102                 kfree(aen);
 103
 104                 mutex_unlock(&ctrl->lock);
 105                 nvmet_req_complete(req, 0);
 106         }
 107 }
 108
 109 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
 110                 u8 event_info, u8 log_page)
 111 {
 112         struct nvmet_async_event *aen;
 113
 114         aen = kmalloc(sizeof(*aen), GFP_KERNEL);
 115         if (!aen)
 116                 return;
 117
 118         aen->event_type = event_type;
 119         aen->event_info = event_info;
 120         aen->log_page = log_page;
 121
 122         mutex_lock(&ctrl->lock);
 123         list_add_tail(&aen->entry, &ctrl->async_events);
 124         mutex_unlock(&ctrl->lock);
 125
 126         schedule_work(&ctrl->async_event_work);
 127 }
 128
 129 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
 130 {
 131         int ret = 0;
 132
 133         down_write(&nvmet_config_sem);
 134         if (nvmet_transports[ops->type])
 135                 ret = -EINVAL;
 136         else
 137                 nvmet_transports[ops->type] = ops;
 138         up_write(&nvmet_config_sem);
 139
 140         return ret;
 141 }
 142 EXPORT_SYMBOL_GPL(nvmet_register_transport);
 143
 144 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
 145 {
 146         down_write(&nvmet_config_sem);
 147         nvmet_transports[ops->type] = NULL;
 148         up_write(&nvmet_config_sem);
 149 }
 150 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 151
 152 int nvmet_enable_port(struct nvmet_port *port)
 153 {
 154         struct nvmet_fabrics_ops *ops;
 155         int ret;
 156
 157         lockdep_assert_held(&nvmet_config_sem);
 158
 159         ops = nvmet_transports[port->disc_addr.trtype];
 160         if (!ops) {
 161                 up_write(&nvmet_config_sem);
 162                 request_module("nvmet-transport-%d", port->disc_addr.trtype);
 163                 down_write(&nvmet_config_sem);
 164                 ops = nvmet_transports[port->disc_addr.trtype];
 165                 if (!ops) {
 166                         pr_err("transport type %d not supported\n",
 167                                 port->disc_addr.trtype);
 168                         return -EINVAL;
 169                 }
 170         }
 171
 172         if (!try_module_get(ops->owner))
 173                 return -EINVAL;
 174
 175         ret = ops->add_port(port);
 176         if (ret) {
 177                 module_put(ops->owner);
 178                 return ret;
 179         }
 180
 181         port->enabled = true;
 182         return 0;
 183 }
 184
 185 void nvmet_disable_port(struct nvmet_port *port)
 186 {
 187         struct nvmet_fabrics_ops *ops;
 188
 189         lockdep_assert_held(&nvmet_config_sem);
 190
 191         port->enabled = false;
 192
 193         ops = nvmet_transports[port->disc_addr.trtype];
 194         ops->remove_port(port);
 195         module_put(ops->owner);
 196 }
 197
 198 static void nvmet_keep_alive_timer(struct work_struct *work)
 199 {
 200         struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 201                         struct nvmet_ctrl, ka_work);
 202
 203         pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
 204                 ctrl->cntlid, ctrl->kato);
 205
 206         nvmet_ctrl_fatal_error(ctrl);
 207 }
 208
 209 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 210 {
 211         pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 212                 ctrl->cntlid, ctrl->kato);
 213
 214         INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 215         schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 216 }
 217
 218 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 219 {
 220         pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 221
 222         cancel_delayed_work_sync(&ctrl->ka_work);
 223 }
 224
 225 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
 226                 __le32 nsid)
 227 {
 228         struct nvmet_ns *ns;
 229
 230         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
 231                 if (ns->nsid == le32_to_cpu(nsid))
 232                         return ns;
 233         }
 234
 235         return NULL;
 236 }
 237
 238 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 239 {
 240         struct nvmet_ns *ns;
 241
 242         rcu_read_lock();
 243         ns = __nvmet_find_namespace(ctrl, nsid);
 244         if (ns)
 245                 percpu_ref_get(&ns->ref);
 246         rcu_read_unlock();
 247
 248         return ns;
 249 }
 250
 251 static void nvmet_destroy_namespace(struct percpu_ref *ref)
 252 {
 253         struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
 254
 255         complete(&ns->disable_done);
 256 }
 257
 258 void nvmet_put_namespace(struct nvmet_ns *ns)
 259 {
 260         percpu_ref_put(&ns->ref);
 261 }
 262
 263 int nvmet_ns_enable(struct nvmet_ns *ns)
 264 {
 265         struct nvmet_subsys *subsys = ns->subsys;
 266         struct nvmet_ctrl *ctrl;
 267         int ret = 0;
 268
 269         mutex_lock(&subsys->lock);
 270         if (ns->enabled)
 271                 goto out_unlock;
 272
 273         ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
 274                         NULL);
 275         if (IS_ERR(ns->bdev)) {
 276                 pr_err("nvmet: failed to open block device %s: (%ld)\n",
 277                         ns->device_path, PTR_ERR(ns->bdev));
 278                 ret = PTR_ERR(ns->bdev);
 279                 ns->bdev = NULL;
 280                 goto out_unlock;
 281         }
 282
 283         ns->size = i_size_read(ns->bdev->bd_inode);
 284         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 285
 286         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 287                                 0, GFP_KERNEL);
 288         if (ret)
 289                 goto out_blkdev_put;
 290
 291         if (ns->nsid > subsys->max_nsid)
 292                 subsys->max_nsid = ns->nsid;
 293
 294         /*
 295          * The namespaces list needs to be sorted to simplify the implementation
 296          * of the Identify Namepace List subcommand.
 297          */
 298         if (list_empty(&subsys->namespaces)) {
 299                 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
 300         } else {
 301                 struct nvmet_ns *old;
 302
 303                 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
 304                         BUG_ON(ns->nsid == old->nsid);
 305                         if (ns->nsid < old->nsid)
 306                                 break;
 307                 }
 308
 309                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 310         }
 311
 312         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 313                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 314
 315         ns->enabled = true;
 316         ret = 0;
 317 out_unlock:
 318         mutex_unlock(&subsys->lock);
 319         return ret;
 320 out_blkdev_put:
 321         blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 322         ns->bdev = NULL;
 323         goto out_unlock;
 324 }
 325
 326 void nvmet_ns_disable(struct nvmet_ns *ns)
 327 {
 328         struct nvmet_subsys *subsys = ns->subsys;
 329         struct nvmet_ctrl *ctrl;
 330
 331         mutex_lock(&subsys->lock);
 332         if (!ns->enabled)
 333                 goto out_unlock;
 334
 335         ns->enabled = false;
 336         list_del_rcu(&ns->dev_link);
 337         mutex_unlock(&subsys->lock);
 338
 339         /*
 340          * Now that we removed the namespaces from the lookup list, we
 341          * can kill the per_cpu ref and wait for any remaining references
 342          * to be dropped, as well as a RCU grace period for anyone only
 343          * using the namepace under rcu_read_lock().  Note that we can't
 344          * use call_rcu here as we need to ensure the namespaces have
 345          * been fully destroyed before unloading the module.
 346          */
 347         percpu_ref_kill(&ns->ref);
 348         synchronize_rcu();
 349         wait_for_completion(&ns->disable_done);
 350         percpu_ref_exit(&ns->ref);
 351
 352         mutex_lock(&subsys->lock);
 353         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 354                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 355
 356         if (ns->bdev)
 357                 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 358 out_unlock:
 359         mutex_unlock(&subsys->lock);
 360 }
 361
 362 void nvmet_ns_free(struct nvmet_ns *ns)
 363 {
 364         nvmet_ns_disable(ns);
 365
 366         kfree(ns->device_path);
 367         kfree(ns);
 368 }
 369
 370 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 371 {
 372         struct nvmet_ns *ns;
 373
 374         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 375         if (!ns)
 376                 return NULL;
 377
 378         INIT_LIST_HEAD(&ns->dev_link);
 379         init_completion(&ns->disable_done);
 380
 381         ns->nsid = nsid;
 382         ns->subsys = subsys;
 383
 384         return ns;
 385 }
 386
 387 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 388 {
 389         if (status)
 390                 nvmet_set_status(req, status);
 391
 392         /* XXX: need to fill in something useful for sq_head */
 393         req->rsp->sq_head = 0;
 394         if (likely(req->sq)) /* may happen during early failure */
 395                 req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 396         req->rsp->command_id = req->cmd->common.command_id;
 397
 398         if (req->ns)
 399                 nvmet_put_namespace(req->ns);
 400         req->ops->queue_response(req);
 401 }
 402
 403 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 404 {
 405         __nvmet_req_complete(req, status);
 406         percpu_ref_put(&req->sq->ref);
 407 }
 408 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 409
 410 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 411                 u16 qid, u16 size)
 412 {
 413         cq->qid = qid;
 414         cq->size = size;
 415
 416         ctrl->cqs[qid] = cq;
 417 }
 418
 419 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 420                 u16 qid, u16 size)
 421 {
 422         sq->qid = qid;
 423         sq->size = size;
 424
 425         ctrl->sqs[qid] = sq;
 426 }
 427
 428 void nvmet_sq_destroy(struct nvmet_sq *sq)
 429 {
 430         /*
 431          * If this is the admin queue, complete all AERs so that our
 432          * queue doesn't have outstanding requests on it.
 433          */
 434         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 435                 nvmet_async_events_free(sq->ctrl);
 436         percpu_ref_kill(&sq->ref);
 437         wait_for_completion(&sq->free_done);
 438         percpu_ref_exit(&sq->ref);
 439
 440         if (sq->ctrl) {
 441                 nvmet_ctrl_put(sq->ctrl);
 442                 sq->ctrl = NULL; /* allows reusing the queue later */
 443         }
 444 }
 445 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
 446
 447 static void nvmet_sq_free(struct percpu_ref *ref)
 448 {
 449         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 450
 451         complete(&sq->free_done);
 452 }
 453
 454 int nvmet_sq_init(struct nvmet_sq *sq)
 455 {
 456         int ret;
 457
 458         ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
 459         if (ret) {
 460                 pr_err("percpu_ref init failed!\n");
 461                 return ret;
 462         }
 463         init_completion(&sq->free_done);
 464
 465         return 0;
 466 }
 467 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 468
 469 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 470                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
 471 {
 472         u8 flags = req->cmd->common.flags;
 473         u16 status;
 474
 475         req->cq = cq;
 476         req->sq = sq;
 477         req->ops = ops;
 478         req->sg = NULL;
 479         req->sg_cnt = 0;
 480         req->rsp->status = 0;
 481
 482         /* no support for fused commands yet */
 483         if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
 484                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 485                 goto fail;
 486         }
 487
 488         /* either variant of SGLs is fine, as we don't support metadata */
 489         if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
 490                      (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
 491                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 492                 goto fail;
 493         }
 494
 495         if (unlikely(!req->sq->ctrl))
 496                 /* will return an error for any Non-connect command: */
 497                 status = nvmet_parse_connect_cmd(req);
 498         else if (likely(req->sq->qid != 0))
 499                 status = nvmet_parse_io_cmd(req);
 500         else if (req->cmd->common.opcode == nvme_fabrics_command)
 501                 status = nvmet_parse_fabrics_cmd(req);
 502         else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
 503                 status = nvmet_parse_discovery_cmd(req);
 504         else
 505                 status = nvmet_parse_admin_cmd(req);
 506
 507         if (status)
 508                 goto fail;
 509
 510         if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
 511                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 512                 goto fail;
 513         }
 514
 515         return true;
 516
 517 fail:
 518         __nvmet_req_complete(req, status);
 519         return false;
 520 }
 521 EXPORT_SYMBOL_GPL(nvmet_req_init);
 522
 523 static inline bool nvmet_cc_en(u32 cc)
 524 {
 525         return cc & 0x1;
 526 }
 527
 528 static inline u8 nvmet_cc_css(u32 cc)
 529 {
 530         return (cc >> 4) & 0x7;
 531 }
 532
 533 static inline u8 nvmet_cc_mps(u32 cc)
 534 {
 535         return (cc >> 7) & 0xf;
 536 }
 537
 538 static inline u8 nvmet_cc_ams(u32 cc)
 539 {
 540         return (cc >> 11) & 0x7;
 541 }
 542
 543 static inline u8 nvmet_cc_shn(u32 cc)
 544 {
 545         return (cc >> 14) & 0x3;
 546 }
 547
 548 static inline u8 nvmet_cc_iosqes(u32 cc)
 549 {
 550         return (cc >> 16) & 0xf;
 551 }
 552
 553 static inline u8 nvmet_cc_iocqes(u32 cc)
 554 {
 555         return (cc >> 20) & 0xf;
 556 }
 557
 558 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 559 {
 560         lockdep_assert_held(&ctrl->lock);
 561
 562         if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
 563             nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
 564             nvmet_cc_mps(ctrl->cc) != 0 ||
 565             nvmet_cc_ams(ctrl->cc) != 0 ||
 566             nvmet_cc_css(ctrl->cc) != 0) {
 567                 ctrl->csts = NVME_CSTS_CFS;
 568                 return;
 569         }
 570
 571         ctrl->csts = NVME_CSTS_RDY;
 572 }
 573
 574 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
 575 {
 576         lockdep_assert_held(&ctrl->lock);
 577
 578         /* XXX: tear down queues? */
 579         ctrl->csts &= ~NVME_CSTS_RDY;
 580         ctrl->cc = 0;
 581 }
 582
 583 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
 584 {
 585         u32 old;
 586
 587         mutex_lock(&ctrl->lock);
 588         old = ctrl->cc;
 589         ctrl->cc = new;
 590
 591         if (nvmet_cc_en(new) && !nvmet_cc_en(old))
 592                 nvmet_start_ctrl(ctrl);
 593         if (!nvmet_cc_en(new) && nvmet_cc_en(old))
 594                 nvmet_clear_ctrl(ctrl);
 595         if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
 596                 nvmet_clear_ctrl(ctrl);
 597                 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
 598         }
 599         if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
 600                 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
 601         mutex_unlock(&ctrl->lock);
 602 }
 603
 604 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 605 {
 606         /* command sets supported: NVMe command set: */
 607         ctrl->cap = (1ULL << 37);
 608         /* CC.EN timeout in 500msec units: */
 609         ctrl->cap |= (15ULL << 24);
 610         /* maximum queue entries supported: */
 611         ctrl->cap |= NVMET_QUEUE_SIZE - 1;
 612 }
 613
 614 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 615                 struct nvmet_req *req, struct nvmet_ctrl **ret)
 616 {
 617         struct nvmet_subsys *subsys;
 618         struct nvmet_ctrl *ctrl;
 619         u16 status = 0;
 620
 621         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 622         if (!subsys) {
 623                 pr_warn("connect request for invalid subsystem %s!\n",
 624                         subsysnqn);
 625                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 626                 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 627         }
 628
 629         mutex_lock(&subsys->lock);
 630         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 631                 if (ctrl->cntlid == cntlid) {
 632                         if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
 633                                 pr_warn("hostnqn mismatch.\n");
 634                                 continue;
 635                         }
 636                         if (!kref_get_unless_zero(&ctrl->ref))
 637                                 continue;
 638
 639                         *ret = ctrl;
 640                         goto out;
 641                 }
 642         }
 643
 644         pr_warn("could not find controller %d for subsys %s / host %s\n",
 645                 cntlid, subsysnqn, hostnqn);
 646         req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 647         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 648
 649 out:
 650         mutex_unlock(&subsys->lock);
 651         nvmet_subsys_put(subsys);
 652         return status;
 653 }
 654
 655 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
 656                 const char *hostnqn)
 657 {
 658         struct nvmet_host_link *p;
 659
 660         if (subsys->allow_any_host)
 661                 return true;
 662
 663         list_for_each_entry(p, &subsys->hosts, entry) {
 664                 if (!strcmp(nvmet_host_name(p->host), hostnqn))
 665                         return true;
 666         }
 667
 668         return false;
 669 }
 670
 671 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
 672                 const char *hostnqn)
 673 {
 674         struct nvmet_subsys_link *s;
 675
 676         list_for_each_entry(s, &req->port->subsystems, entry) {
 677                 if (__nvmet_host_allowed(s->subsys, hostnqn))
 678                         return true;
 679         }
 680
 681         return false;
 682 }
 683
 684 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 685                 const char *hostnqn)
 686 {
 687         lockdep_assert_held(&nvmet_config_sem);
 688
 689         if (subsys->type == NVME_NQN_DISC)
 690                 return nvmet_host_discovery_allowed(req, hostnqn);
 691         else
 692                 return __nvmet_host_allowed(subsys, hostnqn);
 693 }
 694
 695 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 696                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
 697 {
 698         struct nvmet_subsys *subsys;
 699         struct nvmet_ctrl *ctrl;
 700         int ret;
 701         u16 status;
 702
 703         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 704         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 705         if (!subsys) {
 706                 pr_warn("connect request for invalid subsystem %s!\n",
 707                         subsysnqn);
 708                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 709                 goto out;
 710         }
 711
 712         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 713         down_read(&nvmet_config_sem);
 714         if (!nvmet_host_allowed(req, subsys, hostnqn)) {
 715                 pr_info("connect by host %s for subsystem %s not allowed\n",
 716                         hostnqn, subsysnqn);
 717                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 718                 up_read(&nvmet_config_sem);
 719                 goto out_put_subsystem;
 720         }
 721         up_read(&nvmet_config_sem);
 722
 723         status = NVME_SC_INTERNAL;
 724         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 725         if (!ctrl)
 726                 goto out_put_subsystem;
 727         mutex_init(&ctrl->lock);
 728
 729         nvmet_init_cap(ctrl);
 730
 731         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 732         INIT_LIST_HEAD(&ctrl->async_events);
 733
 734         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 735         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 736
 737         /* generate a random serial number as our controllers are ephemeral: */
 738         get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
 739
 740         kref_init(&ctrl->ref);
 741         ctrl->subsys = subsys;
 742
 743         ctrl->cqs = kcalloc(subsys->max_qid + 1,
 744                         sizeof(struct nvmet_cq *),
 745                         GFP_KERNEL);
 746         if (!ctrl->cqs)
 747                 goto out_free_ctrl;
 748
 749         ctrl->sqs = kcalloc(subsys->max_qid + 1,
 750                         sizeof(struct nvmet_sq *),
 751                         GFP_KERNEL);
 752         if (!ctrl->sqs)
 753                 goto out_free_cqs;
 754
 755         ret = ida_simple_get(&cntlid_ida,
 756                              NVME_CNTLID_MIN, NVME_CNTLID_MAX,
 757                              GFP_KERNEL);
 758         if (ret < 0) {
 759                 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 760                 goto out_free_sqs;
 761         }
 762         ctrl->cntlid = ret;
 763
 764         ctrl->ops = req->ops;
 765         if (ctrl->subsys->type == NVME_NQN_DISC) {
 766                 /* Don't accept keep-alive timeout for discovery controllers */
 767                 if (kato) {
 768                         status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 769                         goto out_free_sqs;
 770                 }
 771
 772                 /*
 773                  * Discovery controllers use some arbitrary high value in order
 774                  * to cleanup stale discovery sessions
 775                  *
 776                  * From the latest base diff RC:
 777                  * "The Keep Alive command is not supported by
 778                  * Discovery controllers. A transport may specify a
 779                  * fixed Discovery controller activity timeout value
 780                  * (e.g., 2 minutes).  If no commands are received
 781                  * by a Discovery controller within that time
 782                  * period, the controller may perform the
 783                  * actions for Keep Alive Timer expiration".
 784                  */
 785                 ctrl->kato = NVMET_DISC_KATO;
 786         } else {
 787                 /* keep-alive timeout in seconds */
 788                 ctrl->kato = DIV_ROUND_UP(kato, 1000);
 789         }
 790         nvmet_start_keep_alive_timer(ctrl);
 791
 792         mutex_lock(&subsys->lock);
 793         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
 794         mutex_unlock(&subsys->lock);
 795
 796         *ctrlp = ctrl;
 797         return 0;
 798
 799 out_free_sqs:
 800         kfree(ctrl->sqs);
 801 out_free_cqs:
 802         kfree(ctrl->cqs);
 803 out_free_ctrl:
 804         kfree(ctrl);
 805 out_put_subsystem:
 806         nvmet_subsys_put(subsys);
 807 out:
 808         return status;
 809 }
 810
 811 static void nvmet_ctrl_free(struct kref *ref)
 812 {
 813         struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 814         struct nvmet_subsys *subsys = ctrl->subsys;
 815
 816         nvmet_stop_keep_alive_timer(ctrl);
 817
 818         mutex_lock(&subsys->lock);
 819         list_del(&ctrl->subsys_entry);
 820         mutex_unlock(&subsys->lock);
 821
 822         flush_work(&ctrl->async_event_work);
 823         cancel_work_sync(&ctrl->fatal_err_work);
 824
 825         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 826         nvmet_subsys_put(subsys);
 827
 828         kfree(ctrl->sqs);
 829         kfree(ctrl->cqs);
 830         kfree(ctrl);
 831 }
 832
 833 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
 834 {
 835         kref_put(&ctrl->ref, nvmet_ctrl_free);
 836 }
 837
 838 static void nvmet_fatal_error_handler(struct work_struct *work)
 839 {
 840         struct nvmet_ctrl *ctrl =
 841                         container_of(work, struct nvmet_ctrl, fatal_err_work);
 842
 843         pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
 844         ctrl->ops->delete_ctrl(ctrl);
 845 }
 846
 847 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 848 {
 849         mutex_lock(&ctrl->lock);
 850         if (!(ctrl->csts & NVME_CSTS_CFS)) {
 851                 ctrl->csts |= NVME_CSTS_CFS;
 852                 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
 853                 schedule_work(&ctrl->fatal_err_work);
 854         }
 855         mutex_unlock(&ctrl->lock);
 856 }
 857 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
 858
 859 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 860                 const char *subsysnqn)
 861 {
 862         struct nvmet_subsys_link *p;
 863
 864         if (!port)
 865                 return NULL;
 866
 867         if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
 868                         NVMF_NQN_SIZE)) {
 869                 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 870                         return NULL;
 871                 return nvmet_disc_subsys;
 872         }
 873
 874         down_read(&nvmet_config_sem);
 875         list_for_each_entry(p, &port->subsystems, entry) {
 876                 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
 877                                 NVMF_NQN_SIZE)) {
 878                         if (!kref_get_unless_zero(&p->subsys->ref))
 879                                 break;
 880                         up_read(&nvmet_config_sem);
 881                         return p->subsys;
 882                 }
 883         }
 884         up_read(&nvmet_config_sem);
 885         return NULL;
 886 }
 887
 888 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 889                 enum nvme_subsys_type type)
 890 {
 891         struct nvmet_subsys *subsys;
 892
 893         subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 894         if (!subsys)
 895                 return NULL;
 896
 897         subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
 898
 899         switch (type) {
 900         case NVME_NQN_NVME:
 901                 subsys->max_qid = NVMET_NR_QUEUES;
 902                 break;
 903         case NVME_NQN_DISC:
 904                 subsys->max_qid = 0;
 905                 break;
 906         default:
 907                 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
 908                 kfree(subsys);
 909                 return NULL;
 910         }
 911         subsys->type = type;
 912         subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 913                         GFP_KERNEL);
 914         if (!subsys->subsysnqn) {
 915                 kfree(subsys);
 916                 return NULL;
 917         }
 918
 919         kref_init(&subsys->ref);
 920
 921         mutex_init(&subsys->lock);
 922         INIT_LIST_HEAD(&subsys->namespaces);
 923         INIT_LIST_HEAD(&subsys->ctrls);
 924         INIT_LIST_HEAD(&subsys->hosts);
 925
 926         return subsys;
 927 }
 928
 929 static void nvmet_subsys_free(struct kref *ref)
 930 {
 931         struct nvmet_subsys *subsys =
 932                 container_of(ref, struct nvmet_subsys, ref);
 933
 934         WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 935
 936         kfree(subsys->subsysnqn);
 937         kfree(subsys);
 938 }
 939
 940 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
 941 {
 942         struct nvmet_ctrl *ctrl;
 943
 944         mutex_lock(&subsys->lock);
 945         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 946                 ctrl->ops->delete_ctrl(ctrl);
 947         mutex_unlock(&subsys->lock);
 948 }
 949
 950 void nvmet_subsys_put(struct nvmet_subsys *subsys)
 951 {
 952         kref_put(&subsys->ref, nvmet_subsys_free);
 953 }
 954
 955 static int __init nvmet_init(void)
 956 {
 957         int error;
 958
 959         error = nvmet_init_discovery();
 960         if (error)
 961                 goto out;
 962
 963         error = nvmet_init_configfs();
 964         if (error)
 965                 goto out_exit_discovery;
 966         return 0;
 967
 968 out_exit_discovery:
 969         nvmet_exit_discovery();
 970 out:
 971         return error;
 972 }
 973
 974 static void __exit nvmet_exit(void)
 975 {
 976         nvmet_exit_configfs();
 977         nvmet_exit_discovery();
 978         ida_destroy(&cntlid_ida);
 979
 980         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
 981         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
 982 }
 983
 984 module_init(nvmet_init);
 985 module_exit(nvmet_exit);
 986
 987 MODULE_LICENSE("GPL v2");