]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/nvme/host/core.c
nvme: move timeout variables to core.c
[mirror_ubuntu-bionic-kernel.git] / drivers / nvme / host / core.c
1 /*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15 #include <linux/blkdev.h>
16 #include <linux/blk-mq.h>
17 #include <linux/delay.h>
18 #include <linux/errno.h>
19 #include <linux/hdreg.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/list_sort.h>
23 #include <linux/slab.h>
24 #include <linux/types.h>
25 #include <linux/pr.h>
26 #include <linux/ptrace.h>
27 #include <linux/nvme_ioctl.h>
28 #include <linux/t10-pi.h>
29 #include <scsi/sg.h>
30 #include <asm/unaligned.h>
31
32 #include "nvme.h"
33
34 #define NVME_MINORS (1U << MINORBITS)
35
36 unsigned char admin_timeout = 60;
37 module_param(admin_timeout, byte, 0644);
38 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
39
40 unsigned char nvme_io_timeout = 30;
41 module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
42 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
43
44 unsigned char shutdown_timeout = 5;
45 module_param(shutdown_timeout, byte, 0644);
46 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
47
48 static int nvme_major;
49 module_param(nvme_major, int, 0);
50
51 static int nvme_char_major;
52 module_param(nvme_char_major, int, 0);
53
54 static LIST_HEAD(nvme_ctrl_list);
55 DEFINE_SPINLOCK(dev_list_lock);
56
57 static struct class *nvme_class;
58
59 static void nvme_free_ns(struct kref *kref)
60 {
61 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
62
63 if (ns->type == NVME_NS_LIGHTNVM)
64 nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
65
66 spin_lock(&dev_list_lock);
67 ns->disk->private_data = NULL;
68 spin_unlock(&dev_list_lock);
69
70 nvme_put_ctrl(ns->ctrl);
71 put_disk(ns->disk);
72 kfree(ns);
73 }
74
75 static void nvme_put_ns(struct nvme_ns *ns)
76 {
77 kref_put(&ns->kref, nvme_free_ns);
78 }
79
80 static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
81 {
82 struct nvme_ns *ns;
83
84 spin_lock(&dev_list_lock);
85 ns = disk->private_data;
86 if (ns) {
87 if (!kref_get_unless_zero(&ns->kref))
88 goto fail;
89 if (!try_module_get(ns->ctrl->ops->module))
90 goto fail_put_ns;
91 }
92 spin_unlock(&dev_list_lock);
93
94 return ns;
95
96 fail_put_ns:
97 kref_put(&ns->kref, nvme_free_ns);
98 fail:
99 spin_unlock(&dev_list_lock);
100 return NULL;
101 }
102
103 void nvme_requeue_req(struct request *req)
104 {
105 unsigned long flags;
106
107 blk_mq_requeue_request(req);
108 spin_lock_irqsave(req->q->queue_lock, flags);
109 if (!blk_queue_stopped(req->q))
110 blk_mq_kick_requeue_list(req->q);
111 spin_unlock_irqrestore(req->q->queue_lock, flags);
112 }
113
114 struct request *nvme_alloc_request(struct request_queue *q,
115 struct nvme_command *cmd, unsigned int flags)
116 {
117 bool write = cmd->common.opcode & 1;
118 struct request *req;
119
120 req = blk_mq_alloc_request(q, write, flags);
121 if (IS_ERR(req))
122 return req;
123
124 req->cmd_type = REQ_TYPE_DRV_PRIV;
125 req->cmd_flags |= REQ_FAILFAST_DRIVER;
126 req->__data_len = 0;
127 req->__sector = (sector_t) -1;
128 req->bio = req->biotail = NULL;
129
130 req->cmd = (unsigned char *)cmd;
131 req->cmd_len = sizeof(struct nvme_command);
132 req->special = (void *)0;
133
134 return req;
135 }
136
137 /*
138 * Returns 0 on success. If the result is negative, it's a Linux error code;
139 * if the result is positive, it's an NVM Express status code
140 */
141 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
142 void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
143 {
144 struct request *req;
145 int ret;
146
147 req = nvme_alloc_request(q, cmd, 0);
148 if (IS_ERR(req))
149 return PTR_ERR(req);
150
151 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
152
153 if (buffer && bufflen) {
154 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
155 if (ret)
156 goto out;
157 }
158
159 blk_execute_rq(req->q, NULL, req, 0);
160 if (result)
161 *result = (u32)(uintptr_t)req->special;
162 ret = req->errors;
163 out:
164 blk_mq_free_request(req);
165 return ret;
166 }
167
168 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
169 void *buffer, unsigned bufflen)
170 {
171 return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
172 }
173
174 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
175 void __user *ubuffer, unsigned bufflen,
176 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
177 u32 *result, unsigned timeout)
178 {
179 bool write = cmd->common.opcode & 1;
180 struct nvme_ns *ns = q->queuedata;
181 struct gendisk *disk = ns ? ns->disk : NULL;
182 struct request *req;
183 struct bio *bio = NULL;
184 void *meta = NULL;
185 int ret;
186
187 req = nvme_alloc_request(q, cmd, 0);
188 if (IS_ERR(req))
189 return PTR_ERR(req);
190
191 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
192
193 if (ubuffer && bufflen) {
194 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
195 GFP_KERNEL);
196 if (ret)
197 goto out;
198 bio = req->bio;
199
200 if (!disk)
201 goto submit;
202 bio->bi_bdev = bdget_disk(disk, 0);
203 if (!bio->bi_bdev) {
204 ret = -ENODEV;
205 goto out_unmap;
206 }
207
208 if (meta_buffer) {
209 struct bio_integrity_payload *bip;
210
211 meta = kmalloc(meta_len, GFP_KERNEL);
212 if (!meta) {
213 ret = -ENOMEM;
214 goto out_unmap;
215 }
216
217 if (write) {
218 if (copy_from_user(meta, meta_buffer,
219 meta_len)) {
220 ret = -EFAULT;
221 goto out_free_meta;
222 }
223 }
224
225 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
226 if (IS_ERR(bip)) {
227 ret = PTR_ERR(bip);
228 goto out_free_meta;
229 }
230
231 bip->bip_iter.bi_size = meta_len;
232 bip->bip_iter.bi_sector = meta_seed;
233
234 ret = bio_integrity_add_page(bio, virt_to_page(meta),
235 meta_len, offset_in_page(meta));
236 if (ret != meta_len) {
237 ret = -ENOMEM;
238 goto out_free_meta;
239 }
240 }
241 }
242 submit:
243 blk_execute_rq(req->q, disk, req, 0);
244 ret = req->errors;
245 if (result)
246 *result = (u32)(uintptr_t)req->special;
247 if (meta && !ret && !write) {
248 if (copy_to_user(meta_buffer, meta, meta_len))
249 ret = -EFAULT;
250 }
251 out_free_meta:
252 kfree(meta);
253 out_unmap:
254 if (bio) {
255 if (disk && bio->bi_bdev)
256 bdput(bio->bi_bdev);
257 blk_rq_unmap_user(bio);
258 }
259 out:
260 blk_mq_free_request(req);
261 return ret;
262 }
263
264 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
265 void __user *ubuffer, unsigned bufflen, u32 *result,
266 unsigned timeout)
267 {
268 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
269 result, timeout);
270 }
271
272 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
273 {
274 struct nvme_command c = { };
275 int error;
276
277 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
278 c.identify.opcode = nvme_admin_identify;
279 c.identify.cns = cpu_to_le32(1);
280
281 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
282 if (!*id)
283 return -ENOMEM;
284
285 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
286 sizeof(struct nvme_id_ctrl));
287 if (error)
288 kfree(*id);
289 return error;
290 }
291
292 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
293 {
294 struct nvme_command c = { };
295
296 c.identify.opcode = nvme_admin_identify;
297 c.identify.cns = cpu_to_le32(2);
298 c.identify.nsid = cpu_to_le32(nsid);
299 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
300 }
301
302 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
303 struct nvme_id_ns **id)
304 {
305 struct nvme_command c = { };
306 int error;
307
308 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
309 c.identify.opcode = nvme_admin_identify,
310 c.identify.nsid = cpu_to_le32(nsid),
311
312 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
313 if (!*id)
314 return -ENOMEM;
315
316 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
317 sizeof(struct nvme_id_ns));
318 if (error)
319 kfree(*id);
320 return error;
321 }
322
323 int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
324 dma_addr_t dma_addr, u32 *result)
325 {
326 struct nvme_command c;
327
328 memset(&c, 0, sizeof(c));
329 c.features.opcode = nvme_admin_get_features;
330 c.features.nsid = cpu_to_le32(nsid);
331 c.features.prp1 = cpu_to_le64(dma_addr);
332 c.features.fid = cpu_to_le32(fid);
333
334 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
335 }
336
337 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
338 dma_addr_t dma_addr, u32 *result)
339 {
340 struct nvme_command c;
341
342 memset(&c, 0, sizeof(c));
343 c.features.opcode = nvme_admin_set_features;
344 c.features.prp1 = cpu_to_le64(dma_addr);
345 c.features.fid = cpu_to_le32(fid);
346 c.features.dword11 = cpu_to_le32(dword11);
347
348 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
349 }
350
351 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
352 {
353 struct nvme_command c = { };
354 int error;
355
356 c.common.opcode = nvme_admin_get_log_page,
357 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
358 c.common.cdw10[0] = cpu_to_le32(
359 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
360 NVME_LOG_SMART),
361
362 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
363 if (!*log)
364 return -ENOMEM;
365
366 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
367 sizeof(struct nvme_smart_log));
368 if (error)
369 kfree(*log);
370 return error;
371 }
372
373 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
374 {
375 u32 q_count = (*count - 1) | ((*count - 1) << 16);
376 u32 result;
377 int status, nr_io_queues;
378
379 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
380 &result);
381 if (status)
382 return status;
383
384 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
385 *count = min(*count, nr_io_queues);
386 return 0;
387 }
388
389 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
390 {
391 struct nvme_user_io io;
392 struct nvme_command c;
393 unsigned length, meta_len;
394 void __user *metadata;
395
396 if (copy_from_user(&io, uio, sizeof(io)))
397 return -EFAULT;
398
399 switch (io.opcode) {
400 case nvme_cmd_write:
401 case nvme_cmd_read:
402 case nvme_cmd_compare:
403 break;
404 default:
405 return -EINVAL;
406 }
407
408 length = (io.nblocks + 1) << ns->lba_shift;
409 meta_len = (io.nblocks + 1) * ns->ms;
410 metadata = (void __user *)(uintptr_t)io.metadata;
411
412 if (ns->ext) {
413 length += meta_len;
414 meta_len = 0;
415 } else if (meta_len) {
416 if ((io.metadata & 3) || !io.metadata)
417 return -EINVAL;
418 }
419
420 memset(&c, 0, sizeof(c));
421 c.rw.opcode = io.opcode;
422 c.rw.flags = io.flags;
423 c.rw.nsid = cpu_to_le32(ns->ns_id);
424 c.rw.slba = cpu_to_le64(io.slba);
425 c.rw.length = cpu_to_le16(io.nblocks);
426 c.rw.control = cpu_to_le16(io.control);
427 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
428 c.rw.reftag = cpu_to_le32(io.reftag);
429 c.rw.apptag = cpu_to_le16(io.apptag);
430 c.rw.appmask = cpu_to_le16(io.appmask);
431
432 return __nvme_submit_user_cmd(ns->queue, &c,
433 (void __user *)(uintptr_t)io.addr, length,
434 metadata, meta_len, io.slba, NULL, 0);
435 }
436
437 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
438 struct nvme_passthru_cmd __user *ucmd)
439 {
440 struct nvme_passthru_cmd cmd;
441 struct nvme_command c;
442 unsigned timeout = 0;
443 int status;
444
445 if (!capable(CAP_SYS_ADMIN))
446 return -EACCES;
447 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
448 return -EFAULT;
449
450 memset(&c, 0, sizeof(c));
451 c.common.opcode = cmd.opcode;
452 c.common.flags = cmd.flags;
453 c.common.nsid = cpu_to_le32(cmd.nsid);
454 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
455 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
456 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
457 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
458 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
459 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
460 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
461 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
462
463 if (cmd.timeout_ms)
464 timeout = msecs_to_jiffies(cmd.timeout_ms);
465
466 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
467 (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
468 &cmd.result, timeout);
469 if (status >= 0) {
470 if (put_user(cmd.result, &ucmd->result))
471 return -EFAULT;
472 }
473
474 return status;
475 }
476
477 static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
478 unsigned int cmd, unsigned long arg)
479 {
480 struct nvme_ns *ns = bdev->bd_disk->private_data;
481
482 switch (cmd) {
483 case NVME_IOCTL_ID:
484 force_successful_syscall_return();
485 return ns->ns_id;
486 case NVME_IOCTL_ADMIN_CMD:
487 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
488 case NVME_IOCTL_IO_CMD:
489 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
490 case NVME_IOCTL_SUBMIT_IO:
491 return nvme_submit_io(ns, (void __user *)arg);
492 #ifdef CONFIG_BLK_DEV_NVME_SCSI
493 case SG_GET_VERSION_NUM:
494 return nvme_sg_get_version_num((void __user *)arg);
495 case SG_IO:
496 return nvme_sg_io(ns, (void __user *)arg);
497 #endif
498 default:
499 return -ENOTTY;
500 }
501 }
502
503 #ifdef CONFIG_COMPAT
504 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
505 unsigned int cmd, unsigned long arg)
506 {
507 switch (cmd) {
508 case SG_IO:
509 return -ENOIOCTLCMD;
510 }
511 return nvme_ioctl(bdev, mode, cmd, arg);
512 }
513 #else
514 #define nvme_compat_ioctl NULL
515 #endif
516
517 static int nvme_open(struct block_device *bdev, fmode_t mode)
518 {
519 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
520 }
521
522 static void nvme_release(struct gendisk *disk, fmode_t mode)
523 {
524 struct nvme_ns *ns = disk->private_data;
525
526 module_put(ns->ctrl->ops->module);
527 nvme_put_ns(ns);
528 }
529
530 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
531 {
532 /* some standard values */
533 geo->heads = 1 << 6;
534 geo->sectors = 1 << 5;
535 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
536 return 0;
537 }
538
539 #ifdef CONFIG_BLK_DEV_INTEGRITY
540 static void nvme_init_integrity(struct nvme_ns *ns)
541 {
542 struct blk_integrity integrity;
543
544 switch (ns->pi_type) {
545 case NVME_NS_DPS_PI_TYPE3:
546 integrity.profile = &t10_pi_type3_crc;
547 break;
548 case NVME_NS_DPS_PI_TYPE1:
549 case NVME_NS_DPS_PI_TYPE2:
550 integrity.profile = &t10_pi_type1_crc;
551 break;
552 default:
553 integrity.profile = NULL;
554 break;
555 }
556 integrity.tuple_size = ns->ms;
557 blk_integrity_register(ns->disk, &integrity);
558 blk_queue_max_integrity_segments(ns->queue, 1);
559 }
560 #else
561 static void nvme_init_integrity(struct nvme_ns *ns)
562 {
563 }
564 #endif /* CONFIG_BLK_DEV_INTEGRITY */
565
566 static void nvme_config_discard(struct nvme_ns *ns)
567 {
568 u32 logical_block_size = queue_logical_block_size(ns->queue);
569 ns->queue->limits.discard_zeroes_data = 0;
570 ns->queue->limits.discard_alignment = logical_block_size;
571 ns->queue->limits.discard_granularity = logical_block_size;
572 blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
573 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
574 }
575
576 static int nvme_revalidate_disk(struct gendisk *disk)
577 {
578 struct nvme_ns *ns = disk->private_data;
579 struct nvme_id_ns *id;
580 u8 lbaf, pi_type;
581 u16 old_ms;
582 unsigned short bs;
583
584 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
585 dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
586 __func__);
587 return -ENODEV;
588 }
589 if (id->ncap == 0) {
590 kfree(id);
591 return -ENODEV;
592 }
593
594 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
595 if (nvme_nvm_register(ns->queue, disk->disk_name)) {
596 dev_warn(disk_to_dev(ns->disk),
597 "%s: LightNVM init failure\n", __func__);
598 kfree(id);
599 return -ENODEV;
600 }
601 ns->type = NVME_NS_LIGHTNVM;
602 }
603
604 if (ns->ctrl->vs >= NVME_VS(1, 1))
605 memcpy(ns->eui, id->eui64, sizeof(ns->eui));
606 if (ns->ctrl->vs >= NVME_VS(1, 2))
607 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
608
609 old_ms = ns->ms;
610 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
611 ns->lba_shift = id->lbaf[lbaf].ds;
612 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
613 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
614
615 /*
616 * If identify namespace failed, use default 512 byte block size so
617 * block layer can use before failing read/write for 0 capacity.
618 */
619 if (ns->lba_shift == 0)
620 ns->lba_shift = 9;
621 bs = 1 << ns->lba_shift;
622 /* XXX: PI implementation requires metadata equal t10 pi tuple size */
623 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
624 id->dps & NVME_NS_DPS_PI_MASK : 0;
625
626 blk_mq_freeze_queue(disk->queue);
627 if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
628 ns->ms != old_ms ||
629 bs != queue_logical_block_size(disk->queue) ||
630 (ns->ms && ns->ext)))
631 blk_integrity_unregister(disk);
632
633 ns->pi_type = pi_type;
634 blk_queue_logical_block_size(ns->queue, bs);
635
636 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
637 nvme_init_integrity(ns);
638 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
639 set_capacity(disk, 0);
640 else
641 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
642
643 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
644 nvme_config_discard(ns);
645 blk_mq_unfreeze_queue(disk->queue);
646
647 kfree(id);
648 return 0;
649 }
650
651 static char nvme_pr_type(enum pr_type type)
652 {
653 switch (type) {
654 case PR_WRITE_EXCLUSIVE:
655 return 1;
656 case PR_EXCLUSIVE_ACCESS:
657 return 2;
658 case PR_WRITE_EXCLUSIVE_REG_ONLY:
659 return 3;
660 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
661 return 4;
662 case PR_WRITE_EXCLUSIVE_ALL_REGS:
663 return 5;
664 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
665 return 6;
666 default:
667 return 0;
668 }
669 };
670
671 static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
672 u64 key, u64 sa_key, u8 op)
673 {
674 struct nvme_ns *ns = bdev->bd_disk->private_data;
675 struct nvme_command c;
676 u8 data[16] = { 0, };
677
678 put_unaligned_le64(key, &data[0]);
679 put_unaligned_le64(sa_key, &data[8]);
680
681 memset(&c, 0, sizeof(c));
682 c.common.opcode = op;
683 c.common.nsid = cpu_to_le32(ns->ns_id);
684 c.common.cdw10[0] = cpu_to_le32(cdw10);
685
686 return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
687 }
688
689 static int nvme_pr_register(struct block_device *bdev, u64 old,
690 u64 new, unsigned flags)
691 {
692 u32 cdw10;
693
694 if (flags & ~PR_FL_IGNORE_KEY)
695 return -EOPNOTSUPP;
696
697 cdw10 = old ? 2 : 0;
698 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
699 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
700 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
701 }
702
703 static int nvme_pr_reserve(struct block_device *bdev, u64 key,
704 enum pr_type type, unsigned flags)
705 {
706 u32 cdw10;
707
708 if (flags & ~PR_FL_IGNORE_KEY)
709 return -EOPNOTSUPP;
710
711 cdw10 = nvme_pr_type(type) << 8;
712 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
713 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
714 }
715
716 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
717 enum pr_type type, bool abort)
718 {
719 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
720 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
721 }
722
723 static int nvme_pr_clear(struct block_device *bdev, u64 key)
724 {
725 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
726 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
727 }
728
729 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
730 {
731 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
732 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
733 }
734
735 static const struct pr_ops nvme_pr_ops = {
736 .pr_register = nvme_pr_register,
737 .pr_reserve = nvme_pr_reserve,
738 .pr_release = nvme_pr_release,
739 .pr_preempt = nvme_pr_preempt,
740 .pr_clear = nvme_pr_clear,
741 };
742
743 static const struct block_device_operations nvme_fops = {
744 .owner = THIS_MODULE,
745 .ioctl = nvme_ioctl,
746 .compat_ioctl = nvme_compat_ioctl,
747 .open = nvme_open,
748 .release = nvme_release,
749 .getgeo = nvme_getgeo,
750 .revalidate_disk= nvme_revalidate_disk,
751 .pr_ops = &nvme_pr_ops,
752 };
753
754 static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
755 {
756 unsigned long timeout =
757 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
758 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
759 int ret;
760
761 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
762 if ((csts & NVME_CSTS_RDY) == bit)
763 break;
764
765 msleep(100);
766 if (fatal_signal_pending(current))
767 return -EINTR;
768 if (time_after(jiffies, timeout)) {
769 dev_err(ctrl->device,
770 "Device not ready; aborting %s\n", enabled ?
771 "initialisation" : "reset");
772 return -ENODEV;
773 }
774 }
775
776 return ret;
777 }
778
779 /*
780 * If the device has been passed off to us in an enabled state, just clear
781 * the enabled bit. The spec says we should set the 'shutdown notification
782 * bits', but doing so may cause the device to complete commands to the
783 * admin queue ... and we don't know what memory that might be pointing at!
784 */
785 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
786 {
787 int ret;
788
789 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
790 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
791
792 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
793 if (ret)
794 return ret;
795 return nvme_wait_ready(ctrl, cap, false);
796 }
797
798 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
799 {
800 /*
801 * Default to a 4K page size, with the intention to update this
802 * path in the future to accomodate architectures with differing
803 * kernel and IO page sizes.
804 */
805 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
806 int ret;
807
808 if (page_shift < dev_page_min) {
809 dev_err(ctrl->device,
810 "Minimum device page size %u too large for host (%u)\n",
811 1 << dev_page_min, 1 << page_shift);
812 return -ENODEV;
813 }
814
815 ctrl->page_size = 1 << page_shift;
816
817 ctrl->ctrl_config = NVME_CC_CSS_NVM;
818 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
819 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
820 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
821 ctrl->ctrl_config |= NVME_CC_ENABLE;
822
823 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
824 if (ret)
825 return ret;
826 return nvme_wait_ready(ctrl, cap, true);
827 }
828
829 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
830 {
831 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
832 u32 csts;
833 int ret;
834
835 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
836 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
837
838 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
839 if (ret)
840 return ret;
841
842 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
843 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
844 break;
845
846 msleep(100);
847 if (fatal_signal_pending(current))
848 return -EINTR;
849 if (time_after(jiffies, timeout)) {
850 dev_err(ctrl->device,
851 "Device shutdown incomplete; abort shutdown\n");
852 return -ENODEV;
853 }
854 }
855
856 return ret;
857 }
858
859 /*
860 * Initialize the cached copies of the Identify data and various controller
861 * register in our nvme_ctrl structure. This should be called as soon as
862 * the admin queue is fully up and running.
863 */
864 int nvme_init_identify(struct nvme_ctrl *ctrl)
865 {
866 struct nvme_id_ctrl *id;
867 u64 cap;
868 int ret, page_shift;
869
870 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
871 if (ret) {
872 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
873 return ret;
874 }
875
876 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
877 if (ret) {
878 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
879 return ret;
880 }
881 page_shift = NVME_CAP_MPSMIN(cap) + 12;
882
883 if (ctrl->vs >= NVME_VS(1, 1))
884 ctrl->subsystem = NVME_CAP_NSSRC(cap);
885
886 ret = nvme_identify_ctrl(ctrl, &id);
887 if (ret) {
888 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
889 return -EIO;
890 }
891
892 ctrl->oncs = le16_to_cpup(&id->oncs);
893 atomic_set(&ctrl->abort_limit, id->acl + 1);
894 ctrl->vwc = id->vwc;
895 memcpy(ctrl->serial, id->sn, sizeof(id->sn));
896 memcpy(ctrl->model, id->mn, sizeof(id->mn));
897 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
898 if (id->mdts)
899 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
900 else
901 ctrl->max_hw_sectors = UINT_MAX;
902
903 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
904 unsigned int max_hw_sectors;
905
906 ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
907 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
908 if (ctrl->max_hw_sectors) {
909 ctrl->max_hw_sectors = min(max_hw_sectors,
910 ctrl->max_hw_sectors);
911 } else {
912 ctrl->max_hw_sectors = max_hw_sectors;
913 }
914 }
915
916 kfree(id);
917 return 0;
918 }
919
920 static int nvme_dev_open(struct inode *inode, struct file *file)
921 {
922 struct nvme_ctrl *ctrl;
923 int instance = iminor(inode);
924 int ret = -ENODEV;
925
926 spin_lock(&dev_list_lock);
927 list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
928 if (ctrl->instance != instance)
929 continue;
930
931 if (!ctrl->admin_q) {
932 ret = -EWOULDBLOCK;
933 break;
934 }
935 if (!kref_get_unless_zero(&ctrl->kref))
936 break;
937 file->private_data = ctrl;
938 ret = 0;
939 break;
940 }
941 spin_unlock(&dev_list_lock);
942
943 return ret;
944 }
945
946 static int nvme_dev_release(struct inode *inode, struct file *file)
947 {
948 nvme_put_ctrl(file->private_data);
949 return 0;
950 }
951
952 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
953 {
954 struct nvme_ns *ns;
955 int ret;
956
957 mutex_lock(&ctrl->namespaces_mutex);
958 if (list_empty(&ctrl->namespaces)) {
959 ret = -ENOTTY;
960 goto out_unlock;
961 }
962
963 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
964 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
965 dev_warn(ctrl->device,
966 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
967 ret = -EINVAL;
968 goto out_unlock;
969 }
970
971 dev_warn(ctrl->device,
972 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
973 kref_get(&ns->kref);
974 mutex_unlock(&ctrl->namespaces_mutex);
975
976 ret = nvme_user_cmd(ctrl, ns, argp);
977 nvme_put_ns(ns);
978 return ret;
979
980 out_unlock:
981 mutex_unlock(&ctrl->namespaces_mutex);
982 return ret;
983 }
984
985 static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
986 unsigned long arg)
987 {
988 struct nvme_ctrl *ctrl = file->private_data;
989 void __user *argp = (void __user *)arg;
990
991 switch (cmd) {
992 case NVME_IOCTL_ADMIN_CMD:
993 return nvme_user_cmd(ctrl, NULL, argp);
994 case NVME_IOCTL_IO_CMD:
995 return nvme_dev_user_cmd(ctrl, argp);
996 case NVME_IOCTL_RESET:
997 dev_warn(ctrl->device, "resetting controller\n");
998 return ctrl->ops->reset_ctrl(ctrl);
999 case NVME_IOCTL_SUBSYS_RESET:
1000 return nvme_reset_subsystem(ctrl);
1001 default:
1002 return -ENOTTY;
1003 }
1004 }
1005
1006 static const struct file_operations nvme_dev_fops = {
1007 .owner = THIS_MODULE,
1008 .open = nvme_dev_open,
1009 .release = nvme_dev_release,
1010 .unlocked_ioctl = nvme_dev_ioctl,
1011 .compat_ioctl = nvme_dev_ioctl,
1012 };
1013
1014 static ssize_t nvme_sysfs_reset(struct device *dev,
1015 struct device_attribute *attr, const char *buf,
1016 size_t count)
1017 {
1018 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1019 int ret;
1020
1021 ret = ctrl->ops->reset_ctrl(ctrl);
1022 if (ret < 0)
1023 return ret;
1024 return count;
1025 }
1026 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1027
1028 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1029 char *buf)
1030 {
1031 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1032 return sprintf(buf, "%pU\n", ns->uuid);
1033 }
1034 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1035
1036 static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
1037 char *buf)
1038 {
1039 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1040 return sprintf(buf, "%8phd\n", ns->eui);
1041 }
1042 static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
1043
1044 static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
1045 char *buf)
1046 {
1047 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1048 return sprintf(buf, "%d\n", ns->ns_id);
1049 }
1050 static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1051
1052 static struct attribute *nvme_ns_attrs[] = {
1053 &dev_attr_uuid.attr,
1054 &dev_attr_eui.attr,
1055 &dev_attr_nsid.attr,
1056 NULL,
1057 };
1058
1059 static umode_t nvme_attrs_are_visible(struct kobject *kobj,
1060 struct attribute *a, int n)
1061 {
1062 struct device *dev = container_of(kobj, struct device, kobj);
1063 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1064
1065 if (a == &dev_attr_uuid.attr) {
1066 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1067 return 0;
1068 }
1069 if (a == &dev_attr_eui.attr) {
1070 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1071 return 0;
1072 }
1073 return a->mode;
1074 }
1075
1076 static const struct attribute_group nvme_ns_attr_group = {
1077 .attrs = nvme_ns_attrs,
1078 .is_visible = nvme_attrs_are_visible,
1079 };
1080
1081 #define nvme_show_function(field) \
1082 static ssize_t field##_show(struct device *dev, \
1083 struct device_attribute *attr, char *buf) \
1084 { \
1085 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
1086 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
1087 } \
1088 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1089
1090 nvme_show_function(model);
1091 nvme_show_function(serial);
1092 nvme_show_function(firmware_rev);
1093
1094 static struct attribute *nvme_dev_attrs[] = {
1095 &dev_attr_reset_controller.attr,
1096 &dev_attr_model.attr,
1097 &dev_attr_serial.attr,
1098 &dev_attr_firmware_rev.attr,
1099 NULL
1100 };
1101
1102 static struct attribute_group nvme_dev_attrs_group = {
1103 .attrs = nvme_dev_attrs,
1104 };
1105
1106 static const struct attribute_group *nvme_dev_attr_groups[] = {
1107 &nvme_dev_attrs_group,
1108 NULL,
1109 };
1110
1111 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
1112 {
1113 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
1114 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
1115
1116 return nsa->ns_id - nsb->ns_id;
1117 }
1118
1119 static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1120 {
1121 struct nvme_ns *ns;
1122
1123 lockdep_assert_held(&ctrl->namespaces_mutex);
1124
1125 list_for_each_entry(ns, &ctrl->namespaces, list) {
1126 if (ns->ns_id == nsid)
1127 return ns;
1128 if (ns->ns_id > nsid)
1129 break;
1130 }
1131 return NULL;
1132 }
1133
1134 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1135 {
1136 struct nvme_ns *ns;
1137 struct gendisk *disk;
1138 int node = dev_to_node(ctrl->dev);
1139
1140 lockdep_assert_held(&ctrl->namespaces_mutex);
1141
1142 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
1143 if (!ns)
1144 return;
1145
1146 ns->queue = blk_mq_init_queue(ctrl->tagset);
1147 if (IS_ERR(ns->queue))
1148 goto out_free_ns;
1149 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
1150 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1151 ns->queue->queuedata = ns;
1152 ns->ctrl = ctrl;
1153
1154 disk = alloc_disk_node(0, node);
1155 if (!disk)
1156 goto out_free_queue;
1157
1158 kref_init(&ns->kref);
1159 ns->ns_id = nsid;
1160 ns->disk = disk;
1161 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
1162
1163 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1164 if (ctrl->max_hw_sectors) {
1165 blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
1166 blk_queue_max_segments(ns->queue,
1167 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
1168 }
1169 if (ctrl->stripe_size)
1170 blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
1171 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1172 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
1173 blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
1174
1175 disk->major = nvme_major;
1176 disk->first_minor = 0;
1177 disk->fops = &nvme_fops;
1178 disk->private_data = ns;
1179 disk->queue = ns->queue;
1180 disk->driverfs_dev = ctrl->device;
1181 disk->flags = GENHD_FL_EXT_DEVT;
1182 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
1183
1184 if (nvme_revalidate_disk(ns->disk))
1185 goto out_free_disk;
1186
1187 list_add_tail(&ns->list, &ctrl->namespaces);
1188 kref_get(&ctrl->kref);
1189 if (ns->type == NVME_NS_LIGHTNVM)
1190 return;
1191
1192 add_disk(ns->disk);
1193 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1194 &nvme_ns_attr_group))
1195 pr_warn("%s: failed to create sysfs group for identification\n",
1196 ns->disk->disk_name);
1197 return;
1198 out_free_disk:
1199 kfree(disk);
1200 out_free_queue:
1201 blk_cleanup_queue(ns->queue);
1202 out_free_ns:
1203 kfree(ns);
1204 }
1205
1206 static void nvme_ns_remove(struct nvme_ns *ns)
1207 {
1208 bool kill = nvme_io_incapable(ns->ctrl) &&
1209 !blk_queue_dying(ns->queue);
1210
1211 lockdep_assert_held(&ns->ctrl->namespaces_mutex);
1212
1213 if (kill) {
1214 blk_set_queue_dying(ns->queue);
1215
1216 /*
1217 * The controller was shutdown first if we got here through
1218 * device removal. The shutdown may requeue outstanding
1219 * requests. These need to be aborted immediately so
1220 * del_gendisk doesn't block indefinitely for their completion.
1221 */
1222 blk_mq_abort_requeue_list(ns->queue);
1223 }
1224 if (ns->disk->flags & GENHD_FL_UP) {
1225 if (blk_get_integrity(ns->disk))
1226 blk_integrity_unregister(ns->disk);
1227 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1228 &nvme_ns_attr_group);
1229 del_gendisk(ns->disk);
1230 }
1231 if (kill || !blk_queue_dying(ns->queue)) {
1232 blk_mq_abort_requeue_list(ns->queue);
1233 blk_cleanup_queue(ns->queue);
1234 }
1235 list_del_init(&ns->list);
1236 nvme_put_ns(ns);
1237 }
1238
1239 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1240 {
1241 struct nvme_ns *ns;
1242
1243 ns = nvme_find_ns(ctrl, nsid);
1244 if (ns) {
1245 if (revalidate_disk(ns->disk))
1246 nvme_ns_remove(ns);
1247 } else
1248 nvme_alloc_ns(ctrl, nsid);
1249 }
1250
1251 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
1252 {
1253 struct nvme_ns *ns;
1254 __le32 *ns_list;
1255 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
1256 int ret = 0;
1257
1258 ns_list = kzalloc(0x1000, GFP_KERNEL);
1259 if (!ns_list)
1260 return -ENOMEM;
1261
1262 for (i = 0; i < num_lists; i++) {
1263 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
1264 if (ret)
1265 goto out;
1266
1267 for (j = 0; j < min(nn, 1024U); j++) {
1268 nsid = le32_to_cpu(ns_list[j]);
1269 if (!nsid)
1270 goto out;
1271
1272 nvme_validate_ns(ctrl, nsid);
1273
1274 while (++prev < nsid) {
1275 ns = nvme_find_ns(ctrl, prev);
1276 if (ns)
1277 nvme_ns_remove(ns);
1278 }
1279 }
1280 nn -= j;
1281 }
1282 out:
1283 kfree(ns_list);
1284 return ret;
1285 }
1286
1287 static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
1288 {
1289 struct nvme_ns *ns, *next;
1290 unsigned i;
1291
1292 lockdep_assert_held(&ctrl->namespaces_mutex);
1293
1294 for (i = 1; i <= nn; i++)
1295 nvme_validate_ns(ctrl, i);
1296
1297 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
1298 if (ns->ns_id > nn)
1299 nvme_ns_remove(ns);
1300 }
1301 }
1302
1303 void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
1304 {
1305 struct nvme_id_ctrl *id;
1306 unsigned nn;
1307
1308 if (nvme_identify_ctrl(ctrl, &id))
1309 return;
1310
1311 mutex_lock(&ctrl->namespaces_mutex);
1312 nn = le32_to_cpu(id->nn);
1313 if (ctrl->vs >= NVME_VS(1, 1) &&
1314 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1315 if (!nvme_scan_ns_list(ctrl, nn))
1316 goto done;
1317 }
1318 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
1319 done:
1320 list_sort(NULL, &ctrl->namespaces, ns_cmp);
1321 mutex_unlock(&ctrl->namespaces_mutex);
1322 kfree(id);
1323 }
1324
1325 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1326 {
1327 struct nvme_ns *ns, *next;
1328
1329 mutex_lock(&ctrl->namespaces_mutex);
1330 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1331 nvme_ns_remove(ns);
1332 mutex_unlock(&ctrl->namespaces_mutex);
1333 }
1334
1335 static DEFINE_IDA(nvme_instance_ida);
1336
1337 static int nvme_set_instance(struct nvme_ctrl *ctrl)
1338 {
1339 int instance, error;
1340
1341 do {
1342 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1343 return -ENODEV;
1344
1345 spin_lock(&dev_list_lock);
1346 error = ida_get_new(&nvme_instance_ida, &instance);
1347 spin_unlock(&dev_list_lock);
1348 } while (error == -EAGAIN);
1349
1350 if (error)
1351 return -ENODEV;
1352
1353 ctrl->instance = instance;
1354 return 0;
1355 }
1356
1357 static void nvme_release_instance(struct nvme_ctrl *ctrl)
1358 {
1359 spin_lock(&dev_list_lock);
1360 ida_remove(&nvme_instance_ida, ctrl->instance);
1361 spin_unlock(&dev_list_lock);
1362 }
1363
1364 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
1365 {
1366 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
1367
1368 spin_lock(&dev_list_lock);
1369 list_del(&ctrl->node);
1370 spin_unlock(&dev_list_lock);
1371 }
1372
1373 static void nvme_free_ctrl(struct kref *kref)
1374 {
1375 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
1376
1377 put_device(ctrl->device);
1378 nvme_release_instance(ctrl);
1379
1380 ctrl->ops->free_ctrl(ctrl);
1381 }
1382
1383 void nvme_put_ctrl(struct nvme_ctrl *ctrl)
1384 {
1385 kref_put(&ctrl->kref, nvme_free_ctrl);
1386 }
1387
1388 /*
1389 * Initialize a NVMe controller structures. This needs to be called during
1390 * earliest initialization so that we have the initialized structured around
1391 * during probing.
1392 */
1393 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1394 const struct nvme_ctrl_ops *ops, unsigned long quirks)
1395 {
1396 int ret;
1397
1398 INIT_LIST_HEAD(&ctrl->namespaces);
1399 mutex_init(&ctrl->namespaces_mutex);
1400 kref_init(&ctrl->kref);
1401 ctrl->dev = dev;
1402 ctrl->ops = ops;
1403 ctrl->quirks = quirks;
1404
1405 ret = nvme_set_instance(ctrl);
1406 if (ret)
1407 goto out;
1408
1409 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
1410 MKDEV(nvme_char_major, ctrl->instance),
1411 ctrl, nvme_dev_attr_groups,
1412 "nvme%d", ctrl->instance);
1413 if (IS_ERR(ctrl->device)) {
1414 ret = PTR_ERR(ctrl->device);
1415 goto out_release_instance;
1416 }
1417 get_device(ctrl->device);
1418
1419 spin_lock(&dev_list_lock);
1420 list_add_tail(&ctrl->node, &nvme_ctrl_list);
1421 spin_unlock(&dev_list_lock);
1422
1423 return 0;
1424 out_release_instance:
1425 nvme_release_instance(ctrl);
1426 out:
1427 return ret;
1428 }
1429
1430 void nvme_stop_queues(struct nvme_ctrl *ctrl)
1431 {
1432 struct nvme_ns *ns;
1433
1434 mutex_lock(&ctrl->namespaces_mutex);
1435 list_for_each_entry(ns, &ctrl->namespaces, list) {
1436 spin_lock_irq(ns->queue->queue_lock);
1437 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
1438 spin_unlock_irq(ns->queue->queue_lock);
1439
1440 blk_mq_cancel_requeue_work(ns->queue);
1441 blk_mq_stop_hw_queues(ns->queue);
1442 }
1443 mutex_unlock(&ctrl->namespaces_mutex);
1444 }
1445
1446 void nvme_start_queues(struct nvme_ctrl *ctrl)
1447 {
1448 struct nvme_ns *ns;
1449
1450 mutex_lock(&ctrl->namespaces_mutex);
1451 list_for_each_entry(ns, &ctrl->namespaces, list) {
1452 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
1453 blk_mq_start_stopped_hw_queues(ns->queue, true);
1454 blk_mq_kick_requeue_list(ns->queue);
1455 }
1456 mutex_unlock(&ctrl->namespaces_mutex);
1457 }
1458
1459 int __init nvme_core_init(void)
1460 {
1461 int result;
1462
1463 result = register_blkdev(nvme_major, "nvme");
1464 if (result < 0)
1465 return result;
1466 else if (result > 0)
1467 nvme_major = result;
1468
1469 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
1470 &nvme_dev_fops);
1471 if (result < 0)
1472 goto unregister_blkdev;
1473 else if (result > 0)
1474 nvme_char_major = result;
1475
1476 nvme_class = class_create(THIS_MODULE, "nvme");
1477 if (IS_ERR(nvme_class)) {
1478 result = PTR_ERR(nvme_class);
1479 goto unregister_chrdev;
1480 }
1481
1482 return 0;
1483
1484 unregister_chrdev:
1485 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1486 unregister_blkdev:
1487 unregister_blkdev(nvme_major, "nvme");
1488 return result;
1489 }
1490
1491 void nvme_core_exit(void)
1492 {
1493 unregister_blkdev(nvme_major, "nvme");
1494 class_destroy(nvme_class);
1495 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1496 }