]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/nvme/host/core.c
nvme: move timeout variables to core.c
[mirror_ubuntu-bionic-kernel.git] / drivers / nvme / host / core.c
CommitLineData
21d34711
CH
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/blkdev.h>
16#include <linux/blk-mq.h>
5fd4ce1b 17#include <linux/delay.h>
21d34711 18#include <linux/errno.h>
1673f1f0 19#include <linux/hdreg.h>
21d34711 20#include <linux/kernel.h>
5bae7f73
CH
21#include <linux/module.h>
22#include <linux/list_sort.h>
21d34711
CH
23#include <linux/slab.h>
24#include <linux/types.h>
1673f1f0
CH
25#include <linux/pr.h>
26#include <linux/ptrace.h>
27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h>
29#include <scsi/sg.h>
30#include <asm/unaligned.h>
21d34711
CH
31
32#include "nvme.h"
33
f3ca80fc
CH
34#define NVME_MINORS (1U << MINORBITS)
35
ba0ba7d3
ML
36unsigned char admin_timeout = 60;
37module_param(admin_timeout, byte, 0644);
38MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
39
40unsigned char nvme_io_timeout = 30;
41module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
42MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
43
44unsigned char shutdown_timeout = 5;
45module_param(shutdown_timeout, byte, 0644);
46MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
47
5bae7f73
CH
48static int nvme_major;
49module_param(nvme_major, int, 0);
50
f3ca80fc
CH
51static int nvme_char_major;
52module_param(nvme_char_major, int, 0);
53
54static LIST_HEAD(nvme_ctrl_list);
1673f1f0
CH
55DEFINE_SPINLOCK(dev_list_lock);
56
f3ca80fc
CH
57static struct class *nvme_class;
58
1673f1f0
CH
59static void nvme_free_ns(struct kref *kref)
60{
61 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
62
63 if (ns->type == NVME_NS_LIGHTNVM)
64 nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
65
66 spin_lock(&dev_list_lock);
67 ns->disk->private_data = NULL;
68 spin_unlock(&dev_list_lock);
69
70 nvme_put_ctrl(ns->ctrl);
71 put_disk(ns->disk);
72 kfree(ns);
73}
74
5bae7f73 75static void nvme_put_ns(struct nvme_ns *ns)
1673f1f0
CH
76{
77 kref_put(&ns->kref, nvme_free_ns);
78}
79
80static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
81{
82 struct nvme_ns *ns;
83
84 spin_lock(&dev_list_lock);
85 ns = disk->private_data;
e439bb12
SG
86 if (ns) {
87 if (!kref_get_unless_zero(&ns->kref))
88 goto fail;
89 if (!try_module_get(ns->ctrl->ops->module))
90 goto fail_put_ns;
91 }
1673f1f0
CH
92 spin_unlock(&dev_list_lock);
93
94 return ns;
e439bb12
SG
95
96fail_put_ns:
97 kref_put(&ns->kref, nvme_free_ns);
98fail:
99 spin_unlock(&dev_list_lock);
100 return NULL;
1673f1f0
CH
101}
102
7688faa6
CH
103void nvme_requeue_req(struct request *req)
104{
105 unsigned long flags;
106
107 blk_mq_requeue_request(req);
108 spin_lock_irqsave(req->q->queue_lock, flags);
109 if (!blk_queue_stopped(req->q))
110 blk_mq_kick_requeue_list(req->q);
111 spin_unlock_irqrestore(req->q->queue_lock, flags);
112}
113
4160982e
CH
114struct request *nvme_alloc_request(struct request_queue *q,
115 struct nvme_command *cmd, unsigned int flags)
21d34711
CH
116{
117 bool write = cmd->common.opcode & 1;
21d34711 118 struct request *req;
21d34711 119
4160982e 120 req = blk_mq_alloc_request(q, write, flags);
21d34711 121 if (IS_ERR(req))
4160982e 122 return req;
21d34711
CH
123
124 req->cmd_type = REQ_TYPE_DRV_PRIV;
125 req->cmd_flags |= REQ_FAILFAST_DRIVER;
126 req->__data_len = 0;
127 req->__sector = (sector_t) -1;
128 req->bio = req->biotail = NULL;
129
21d34711
CH
130 req->cmd = (unsigned char *)cmd;
131 req->cmd_len = sizeof(struct nvme_command);
132 req->special = (void *)0;
133
4160982e
CH
134 return req;
135}
136
137/*
138 * Returns 0 on success. If the result is negative, it's a Linux error code;
139 * if the result is positive, it's an NVM Express status code
140 */
141int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
142 void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
143{
144 struct request *req;
145 int ret;
146
147 req = nvme_alloc_request(q, cmd, 0);
148 if (IS_ERR(req))
149 return PTR_ERR(req);
150
151 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
152
21d34711
CH
153 if (buffer && bufflen) {
154 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
155 if (ret)
156 goto out;
4160982e
CH
157 }
158
159 blk_execute_rq(req->q, NULL, req, 0);
160 if (result)
161 *result = (u32)(uintptr_t)req->special;
162 ret = req->errors;
163 out:
164 blk_mq_free_request(req);
165 return ret;
166}
167
168int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
169 void *buffer, unsigned bufflen)
170{
171 return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
172}
173
0b7f1f26
KB
174int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
175 void __user *ubuffer, unsigned bufflen,
176 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
177 u32 *result, unsigned timeout)
4160982e 178{
0b7f1f26
KB
179 bool write = cmd->common.opcode & 1;
180 struct nvme_ns *ns = q->queuedata;
181 struct gendisk *disk = ns ? ns->disk : NULL;
4160982e 182 struct request *req;
0b7f1f26
KB
183 struct bio *bio = NULL;
184 void *meta = NULL;
4160982e
CH
185 int ret;
186
187 req = nvme_alloc_request(q, cmd, 0);
188 if (IS_ERR(req))
189 return PTR_ERR(req);
190
191 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
192
193 if (ubuffer && bufflen) {
21d34711
CH
194 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
195 GFP_KERNEL);
196 if (ret)
197 goto out;
198 bio = req->bio;
21d34711 199
0b7f1f26
KB
200 if (!disk)
201 goto submit;
202 bio->bi_bdev = bdget_disk(disk, 0);
203 if (!bio->bi_bdev) {
204 ret = -ENODEV;
205 goto out_unmap;
206 }
207
208 if (meta_buffer) {
209 struct bio_integrity_payload *bip;
210
211 meta = kmalloc(meta_len, GFP_KERNEL);
212 if (!meta) {
213 ret = -ENOMEM;
214 goto out_unmap;
215 }
216
217 if (write) {
218 if (copy_from_user(meta, meta_buffer,
219 meta_len)) {
220 ret = -EFAULT;
221 goto out_free_meta;
222 }
223 }
224
225 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
06c1e390
KB
226 if (IS_ERR(bip)) {
227 ret = PTR_ERR(bip);
0b7f1f26
KB
228 goto out_free_meta;
229 }
230
231 bip->bip_iter.bi_size = meta_len;
232 bip->bip_iter.bi_sector = meta_seed;
233
234 ret = bio_integrity_add_page(bio, virt_to_page(meta),
235 meta_len, offset_in_page(meta));
236 if (ret != meta_len) {
237 ret = -ENOMEM;
238 goto out_free_meta;
239 }
240 }
241 }
242 submit:
243 blk_execute_rq(req->q, disk, req, 0);
244 ret = req->errors;
21d34711
CH
245 if (result)
246 *result = (u32)(uintptr_t)req->special;
0b7f1f26
KB
247 if (meta && !ret && !write) {
248 if (copy_to_user(meta_buffer, meta, meta_len))
249 ret = -EFAULT;
250 }
251 out_free_meta:
252 kfree(meta);
253 out_unmap:
254 if (bio) {
255 if (disk && bio->bi_bdev)
256 bdput(bio->bi_bdev);
257 blk_rq_unmap_user(bio);
258 }
21d34711
CH
259 out:
260 blk_mq_free_request(req);
261 return ret;
262}
263
0b7f1f26
KB
264int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
265 void __user *ubuffer, unsigned bufflen, u32 *result,
266 unsigned timeout)
267{
268 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
269 result, timeout);
270}
271
1c63dc66 272int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
21d34711
CH
273{
274 struct nvme_command c = { };
275 int error;
276
277 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
278 c.identify.opcode = nvme_admin_identify;
279 c.identify.cns = cpu_to_le32(1);
280
281 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
282 if (!*id)
283 return -ENOMEM;
284
285 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
286 sizeof(struct nvme_id_ctrl));
287 if (error)
288 kfree(*id);
289 return error;
290}
291
540c801c
KB
292static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
293{
294 struct nvme_command c = { };
295
296 c.identify.opcode = nvme_admin_identify;
297 c.identify.cns = cpu_to_le32(2);
298 c.identify.nsid = cpu_to_le32(nsid);
299 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
300}
301
1c63dc66 302int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
21d34711
CH
303 struct nvme_id_ns **id)
304{
305 struct nvme_command c = { };
306 int error;
307
308 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
309 c.identify.opcode = nvme_admin_identify,
310 c.identify.nsid = cpu_to_le32(nsid),
311
312 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
313 if (!*id)
314 return -ENOMEM;
315
316 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
317 sizeof(struct nvme_id_ns));
318 if (error)
319 kfree(*id);
320 return error;
321}
322
1c63dc66 323int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
21d34711
CH
324 dma_addr_t dma_addr, u32 *result)
325{
326 struct nvme_command c;
327
328 memset(&c, 0, sizeof(c));
329 c.features.opcode = nvme_admin_get_features;
330 c.features.nsid = cpu_to_le32(nsid);
331 c.features.prp1 = cpu_to_le64(dma_addr);
332 c.features.fid = cpu_to_le32(fid);
333
4160982e 334 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
21d34711
CH
335}
336
1c63dc66 337int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
21d34711
CH
338 dma_addr_t dma_addr, u32 *result)
339{
340 struct nvme_command c;
341
342 memset(&c, 0, sizeof(c));
343 c.features.opcode = nvme_admin_set_features;
344 c.features.prp1 = cpu_to_le64(dma_addr);
345 c.features.fid = cpu_to_le32(fid);
346 c.features.dword11 = cpu_to_le32(dword11);
347
4160982e 348 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
21d34711
CH
349}
350
1c63dc66 351int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
21d34711
CH
352{
353 struct nvme_command c = { };
354 int error;
355
356 c.common.opcode = nvme_admin_get_log_page,
357 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
358 c.common.cdw10[0] = cpu_to_le32(
359 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
360 NVME_LOG_SMART),
361
362 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
363 if (!*log)
364 return -ENOMEM;
365
366 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
367 sizeof(struct nvme_smart_log));
368 if (error)
369 kfree(*log);
370 return error;
371}
1673f1f0 372
9a0be7ab
CH
373int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
374{
375 u32 q_count = (*count - 1) | ((*count - 1) << 16);
376 u32 result;
377 int status, nr_io_queues;
378
379 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
380 &result);
381 if (status)
382 return status;
383
384 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
385 *count = min(*count, nr_io_queues);
386 return 0;
387}
388
1673f1f0
CH
389static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
390{
391 struct nvme_user_io io;
392 struct nvme_command c;
393 unsigned length, meta_len;
394 void __user *metadata;
395
396 if (copy_from_user(&io, uio, sizeof(io)))
397 return -EFAULT;
398
399 switch (io.opcode) {
400 case nvme_cmd_write:
401 case nvme_cmd_read:
402 case nvme_cmd_compare:
403 break;
404 default:
405 return -EINVAL;
406 }
407
408 length = (io.nblocks + 1) << ns->lba_shift;
409 meta_len = (io.nblocks + 1) * ns->ms;
410 metadata = (void __user *)(uintptr_t)io.metadata;
411
412 if (ns->ext) {
413 length += meta_len;
414 meta_len = 0;
415 } else if (meta_len) {
416 if ((io.metadata & 3) || !io.metadata)
417 return -EINVAL;
418 }
419
420 memset(&c, 0, sizeof(c));
421 c.rw.opcode = io.opcode;
422 c.rw.flags = io.flags;
423 c.rw.nsid = cpu_to_le32(ns->ns_id);
424 c.rw.slba = cpu_to_le64(io.slba);
425 c.rw.length = cpu_to_le16(io.nblocks);
426 c.rw.control = cpu_to_le16(io.control);
427 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
428 c.rw.reftag = cpu_to_le32(io.reftag);
429 c.rw.apptag = cpu_to_le16(io.apptag);
430 c.rw.appmask = cpu_to_le16(io.appmask);
431
432 return __nvme_submit_user_cmd(ns->queue, &c,
433 (void __user *)(uintptr_t)io.addr, length,
434 metadata, meta_len, io.slba, NULL, 0);
435}
436
f3ca80fc 437static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1673f1f0
CH
438 struct nvme_passthru_cmd __user *ucmd)
439{
440 struct nvme_passthru_cmd cmd;
441 struct nvme_command c;
442 unsigned timeout = 0;
443 int status;
444
445 if (!capable(CAP_SYS_ADMIN))
446 return -EACCES;
447 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
448 return -EFAULT;
449
450 memset(&c, 0, sizeof(c));
451 c.common.opcode = cmd.opcode;
452 c.common.flags = cmd.flags;
453 c.common.nsid = cpu_to_le32(cmd.nsid);
454 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
455 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
456 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
457 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
458 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
459 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
460 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
461 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
462
463 if (cmd.timeout_ms)
464 timeout = msecs_to_jiffies(cmd.timeout_ms);
465
466 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
d1ea7be5 467 (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
1673f1f0
CH
468 &cmd.result, timeout);
469 if (status >= 0) {
470 if (put_user(cmd.result, &ucmd->result))
471 return -EFAULT;
472 }
473
474 return status;
475}
476
477static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
478 unsigned int cmd, unsigned long arg)
479{
480 struct nvme_ns *ns = bdev->bd_disk->private_data;
481
482 switch (cmd) {
483 case NVME_IOCTL_ID:
484 force_successful_syscall_return();
485 return ns->ns_id;
486 case NVME_IOCTL_ADMIN_CMD:
487 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
488 case NVME_IOCTL_IO_CMD:
489 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
490 case NVME_IOCTL_SUBMIT_IO:
491 return nvme_submit_io(ns, (void __user *)arg);
44907332 492#ifdef CONFIG_BLK_DEV_NVME_SCSI
1673f1f0
CH
493 case SG_GET_VERSION_NUM:
494 return nvme_sg_get_version_num((void __user *)arg);
495 case SG_IO:
496 return nvme_sg_io(ns, (void __user *)arg);
44907332 497#endif
1673f1f0
CH
498 default:
499 return -ENOTTY;
500 }
501}
502
503#ifdef CONFIG_COMPAT
504static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
505 unsigned int cmd, unsigned long arg)
506{
507 switch (cmd) {
508 case SG_IO:
509 return -ENOIOCTLCMD;
510 }
511 return nvme_ioctl(bdev, mode, cmd, arg);
512}
513#else
514#define nvme_compat_ioctl NULL
515#endif
516
517static int nvme_open(struct block_device *bdev, fmode_t mode)
518{
519 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
520}
521
522static void nvme_release(struct gendisk *disk, fmode_t mode)
523{
e439bb12
SG
524 struct nvme_ns *ns = disk->private_data;
525
526 module_put(ns->ctrl->ops->module);
527 nvme_put_ns(ns);
1673f1f0
CH
528}
529
530static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
531{
532 /* some standard values */
533 geo->heads = 1 << 6;
534 geo->sectors = 1 << 5;
535 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
536 return 0;
537}
538
539#ifdef CONFIG_BLK_DEV_INTEGRITY
540static void nvme_init_integrity(struct nvme_ns *ns)
541{
542 struct blk_integrity integrity;
543
544 switch (ns->pi_type) {
545 case NVME_NS_DPS_PI_TYPE3:
546 integrity.profile = &t10_pi_type3_crc;
547 break;
548 case NVME_NS_DPS_PI_TYPE1:
549 case NVME_NS_DPS_PI_TYPE2:
550 integrity.profile = &t10_pi_type1_crc;
551 break;
552 default:
553 integrity.profile = NULL;
554 break;
555 }
556 integrity.tuple_size = ns->ms;
557 blk_integrity_register(ns->disk, &integrity);
558 blk_queue_max_integrity_segments(ns->queue, 1);
559}
560#else
561static void nvme_init_integrity(struct nvme_ns *ns)
562{
563}
564#endif /* CONFIG_BLK_DEV_INTEGRITY */
565
566static void nvme_config_discard(struct nvme_ns *ns)
567{
568 u32 logical_block_size = queue_logical_block_size(ns->queue);
569 ns->queue->limits.discard_zeroes_data = 0;
570 ns->queue->limits.discard_alignment = logical_block_size;
571 ns->queue->limits.discard_granularity = logical_block_size;
572 blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
573 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
574}
575
5bae7f73 576static int nvme_revalidate_disk(struct gendisk *disk)
1673f1f0
CH
577{
578 struct nvme_ns *ns = disk->private_data;
579 struct nvme_id_ns *id;
580 u8 lbaf, pi_type;
581 u16 old_ms;
582 unsigned short bs;
583
584 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
1b3c47c1
SG
585 dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
586 __func__);
1673f1f0
CH
587 return -ENODEV;
588 }
589 if (id->ncap == 0) {
590 kfree(id);
591 return -ENODEV;
592 }
593
594 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
595 if (nvme_nvm_register(ns->queue, disk->disk_name)) {
1b3c47c1 596 dev_warn(disk_to_dev(ns->disk),
1673f1f0
CH
597 "%s: LightNVM init failure\n", __func__);
598 kfree(id);
599 return -ENODEV;
600 }
601 ns->type = NVME_NS_LIGHTNVM;
602 }
603
2b9b6e86
KB
604 if (ns->ctrl->vs >= NVME_VS(1, 1))
605 memcpy(ns->eui, id->eui64, sizeof(ns->eui));
606 if (ns->ctrl->vs >= NVME_VS(1, 2))
607 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
608
1673f1f0
CH
609 old_ms = ns->ms;
610 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
611 ns->lba_shift = id->lbaf[lbaf].ds;
612 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
613 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
614
615 /*
616 * If identify namespace failed, use default 512 byte block size so
617 * block layer can use before failing read/write for 0 capacity.
618 */
619 if (ns->lba_shift == 0)
620 ns->lba_shift = 9;
621 bs = 1 << ns->lba_shift;
1673f1f0
CH
622 /* XXX: PI implementation requires metadata equal t10 pi tuple size */
623 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
624 id->dps & NVME_NS_DPS_PI_MASK : 0;
625
626 blk_mq_freeze_queue(disk->queue);
627 if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
628 ns->ms != old_ms ||
629 bs != queue_logical_block_size(disk->queue) ||
630 (ns->ms && ns->ext)))
631 blk_integrity_unregister(disk);
632
633 ns->pi_type = pi_type;
634 blk_queue_logical_block_size(ns->queue, bs);
635
4b9d5b15 636 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
1673f1f0 637 nvme_init_integrity(ns);
1673f1f0
CH
638 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
639 set_capacity(disk, 0);
640 else
641 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
642
643 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
644 nvme_config_discard(ns);
645 blk_mq_unfreeze_queue(disk->queue);
646
647 kfree(id);
648 return 0;
649}
650
651static char nvme_pr_type(enum pr_type type)
652{
653 switch (type) {
654 case PR_WRITE_EXCLUSIVE:
655 return 1;
656 case PR_EXCLUSIVE_ACCESS:
657 return 2;
658 case PR_WRITE_EXCLUSIVE_REG_ONLY:
659 return 3;
660 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
661 return 4;
662 case PR_WRITE_EXCLUSIVE_ALL_REGS:
663 return 5;
664 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
665 return 6;
666 default:
667 return 0;
668 }
669};
670
671static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
672 u64 key, u64 sa_key, u8 op)
673{
674 struct nvme_ns *ns = bdev->bd_disk->private_data;
675 struct nvme_command c;
676 u8 data[16] = { 0, };
677
678 put_unaligned_le64(key, &data[0]);
679 put_unaligned_le64(sa_key, &data[8]);
680
681 memset(&c, 0, sizeof(c));
682 c.common.opcode = op;
683 c.common.nsid = cpu_to_le32(ns->ns_id);
684 c.common.cdw10[0] = cpu_to_le32(cdw10);
685
686 return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
687}
688
689static int nvme_pr_register(struct block_device *bdev, u64 old,
690 u64 new, unsigned flags)
691{
692 u32 cdw10;
693
694 if (flags & ~PR_FL_IGNORE_KEY)
695 return -EOPNOTSUPP;
696
697 cdw10 = old ? 2 : 0;
698 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
699 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
700 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
701}
702
703static int nvme_pr_reserve(struct block_device *bdev, u64 key,
704 enum pr_type type, unsigned flags)
705{
706 u32 cdw10;
707
708 if (flags & ~PR_FL_IGNORE_KEY)
709 return -EOPNOTSUPP;
710
711 cdw10 = nvme_pr_type(type) << 8;
712 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
713 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
714}
715
716static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
717 enum pr_type type, bool abort)
718{
719 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
720 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
721}
722
723static int nvme_pr_clear(struct block_device *bdev, u64 key)
724{
8c0b3915 725 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
1673f1f0
CH
726 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
727}
728
729static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
730{
731 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
732 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
733}
734
735static const struct pr_ops nvme_pr_ops = {
736 .pr_register = nvme_pr_register,
737 .pr_reserve = nvme_pr_reserve,
738 .pr_release = nvme_pr_release,
739 .pr_preempt = nvme_pr_preempt,
740 .pr_clear = nvme_pr_clear,
741};
742
5bae7f73 743static const struct block_device_operations nvme_fops = {
1673f1f0
CH
744 .owner = THIS_MODULE,
745 .ioctl = nvme_ioctl,
746 .compat_ioctl = nvme_compat_ioctl,
747 .open = nvme_open,
748 .release = nvme_release,
749 .getgeo = nvme_getgeo,
750 .revalidate_disk= nvme_revalidate_disk,
751 .pr_ops = &nvme_pr_ops,
752};
753
5fd4ce1b
CH
754static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
755{
756 unsigned long timeout =
757 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
758 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
759 int ret;
760
761 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
762 if ((csts & NVME_CSTS_RDY) == bit)
763 break;
764
765 msleep(100);
766 if (fatal_signal_pending(current))
767 return -EINTR;
768 if (time_after(jiffies, timeout)) {
1b3c47c1 769 dev_err(ctrl->device,
5fd4ce1b
CH
770 "Device not ready; aborting %s\n", enabled ?
771 "initialisation" : "reset");
772 return -ENODEV;
773 }
774 }
775
776 return ret;
777}
778
779/*
780 * If the device has been passed off to us in an enabled state, just clear
781 * the enabled bit. The spec says we should set the 'shutdown notification
782 * bits', but doing so may cause the device to complete commands to the
783 * admin queue ... and we don't know what memory that might be pointing at!
784 */
785int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
786{
787 int ret;
788
789 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
790 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
791
792 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
793 if (ret)
794 return ret;
795 return nvme_wait_ready(ctrl, cap, false);
796}
797
798int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
799{
800 /*
801 * Default to a 4K page size, with the intention to update this
802 * path in the future to accomodate architectures with differing
803 * kernel and IO page sizes.
804 */
805 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
806 int ret;
807
808 if (page_shift < dev_page_min) {
1b3c47c1 809 dev_err(ctrl->device,
5fd4ce1b
CH
810 "Minimum device page size %u too large for host (%u)\n",
811 1 << dev_page_min, 1 << page_shift);
812 return -ENODEV;
813 }
814
815 ctrl->page_size = 1 << page_shift;
816
817 ctrl->ctrl_config = NVME_CC_CSS_NVM;
818 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
819 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
820 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
821 ctrl->ctrl_config |= NVME_CC_ENABLE;
822
823 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
824 if (ret)
825 return ret;
826 return nvme_wait_ready(ctrl, cap, true);
827}
828
829int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
830{
831 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
832 u32 csts;
833 int ret;
834
835 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
836 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
837
838 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
839 if (ret)
840 return ret;
841
842 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
843 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
844 break;
845
846 msleep(100);
847 if (fatal_signal_pending(current))
848 return -EINTR;
849 if (time_after(jiffies, timeout)) {
1b3c47c1 850 dev_err(ctrl->device,
5fd4ce1b
CH
851 "Device shutdown incomplete; abort shutdown\n");
852 return -ENODEV;
853 }
854 }
855
856 return ret;
857}
858
7fd8930f
CH
859/*
860 * Initialize the cached copies of the Identify data and various controller
861 * register in our nvme_ctrl structure. This should be called as soon as
862 * the admin queue is fully up and running.
863 */
864int nvme_init_identify(struct nvme_ctrl *ctrl)
865{
866 struct nvme_id_ctrl *id;
867 u64 cap;
868 int ret, page_shift;
869
f3ca80fc
CH
870 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
871 if (ret) {
1b3c47c1 872 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
f3ca80fc
CH
873 return ret;
874 }
875
7fd8930f
CH
876 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
877 if (ret) {
1b3c47c1 878 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
7fd8930f
CH
879 return ret;
880 }
881 page_shift = NVME_CAP_MPSMIN(cap) + 12;
882
f3ca80fc
CH
883 if (ctrl->vs >= NVME_VS(1, 1))
884 ctrl->subsystem = NVME_CAP_NSSRC(cap);
885
7fd8930f
CH
886 ret = nvme_identify_ctrl(ctrl, &id);
887 if (ret) {
1b3c47c1 888 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
7fd8930f
CH
889 return -EIO;
890 }
891
892 ctrl->oncs = le16_to_cpup(&id->oncs);
6bf25d16 893 atomic_set(&ctrl->abort_limit, id->acl + 1);
7fd8930f
CH
894 ctrl->vwc = id->vwc;
895 memcpy(ctrl->serial, id->sn, sizeof(id->sn));
896 memcpy(ctrl->model, id->mn, sizeof(id->mn));
897 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
898 if (id->mdts)
899 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
900 else
901 ctrl->max_hw_sectors = UINT_MAX;
902
903 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
904 unsigned int max_hw_sectors;
905
906 ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
907 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
908 if (ctrl->max_hw_sectors) {
909 ctrl->max_hw_sectors = min(max_hw_sectors,
910 ctrl->max_hw_sectors);
911 } else {
912 ctrl->max_hw_sectors = max_hw_sectors;
913 }
914 }
915
916 kfree(id);
917 return 0;
918}
919
f3ca80fc 920static int nvme_dev_open(struct inode *inode, struct file *file)
1673f1f0 921{
f3ca80fc
CH
922 struct nvme_ctrl *ctrl;
923 int instance = iminor(inode);
924 int ret = -ENODEV;
1673f1f0 925
f3ca80fc
CH
926 spin_lock(&dev_list_lock);
927 list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
928 if (ctrl->instance != instance)
929 continue;
930
931 if (!ctrl->admin_q) {
932 ret = -EWOULDBLOCK;
933 break;
934 }
935 if (!kref_get_unless_zero(&ctrl->kref))
936 break;
937 file->private_data = ctrl;
938 ret = 0;
939 break;
940 }
941 spin_unlock(&dev_list_lock);
942
943 return ret;
1673f1f0
CH
944}
945
f3ca80fc 946static int nvme_dev_release(struct inode *inode, struct file *file)
1673f1f0 947{
f3ca80fc
CH
948 nvme_put_ctrl(file->private_data);
949 return 0;
950}
951
bfd89471
CH
952static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
953{
954 struct nvme_ns *ns;
955 int ret;
956
957 mutex_lock(&ctrl->namespaces_mutex);
958 if (list_empty(&ctrl->namespaces)) {
959 ret = -ENOTTY;
960 goto out_unlock;
961 }
962
963 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
964 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
1b3c47c1 965 dev_warn(ctrl->device,
bfd89471
CH
966 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
967 ret = -EINVAL;
968 goto out_unlock;
969 }
970
1b3c47c1 971 dev_warn(ctrl->device,
bfd89471
CH
972 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
973 kref_get(&ns->kref);
974 mutex_unlock(&ctrl->namespaces_mutex);
975
976 ret = nvme_user_cmd(ctrl, ns, argp);
977 nvme_put_ns(ns);
978 return ret;
979
980out_unlock:
981 mutex_unlock(&ctrl->namespaces_mutex);
982 return ret;
983}
984
f3ca80fc
CH
985static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
986 unsigned long arg)
987{
988 struct nvme_ctrl *ctrl = file->private_data;
989 void __user *argp = (void __user *)arg;
f3ca80fc
CH
990
991 switch (cmd) {
992 case NVME_IOCTL_ADMIN_CMD:
993 return nvme_user_cmd(ctrl, NULL, argp);
994 case NVME_IOCTL_IO_CMD:
bfd89471 995 return nvme_dev_user_cmd(ctrl, argp);
f3ca80fc 996 case NVME_IOCTL_RESET:
1b3c47c1 997 dev_warn(ctrl->device, "resetting controller\n");
f3ca80fc
CH
998 return ctrl->ops->reset_ctrl(ctrl);
999 case NVME_IOCTL_SUBSYS_RESET:
1000 return nvme_reset_subsystem(ctrl);
1001 default:
1002 return -ENOTTY;
1003 }
1004}
1005
1006static const struct file_operations nvme_dev_fops = {
1007 .owner = THIS_MODULE,
1008 .open = nvme_dev_open,
1009 .release = nvme_dev_release,
1010 .unlocked_ioctl = nvme_dev_ioctl,
1011 .compat_ioctl = nvme_dev_ioctl,
1012};
1013
1014static ssize_t nvme_sysfs_reset(struct device *dev,
1015 struct device_attribute *attr, const char *buf,
1016 size_t count)
1017{
1018 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1019 int ret;
1020
1021 ret = ctrl->ops->reset_ctrl(ctrl);
1022 if (ret < 0)
1023 return ret;
1024 return count;
1673f1f0 1025}
f3ca80fc 1026static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1673f1f0 1027
2b9b6e86
KB
1028static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1029 char *buf)
1030{
1031 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1032 return sprintf(buf, "%pU\n", ns->uuid);
1033}
1034static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1035
1036static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
1037 char *buf)
1038{
1039 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1040 return sprintf(buf, "%8phd\n", ns->eui);
1041}
1042static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
1043
1044static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
1045 char *buf)
1046{
1047 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1048 return sprintf(buf, "%d\n", ns->ns_id);
1049}
1050static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1051
1052static struct attribute *nvme_ns_attrs[] = {
1053 &dev_attr_uuid.attr,
1054 &dev_attr_eui.attr,
1055 &dev_attr_nsid.attr,
1056 NULL,
1057};
1058
1059static umode_t nvme_attrs_are_visible(struct kobject *kobj,
1060 struct attribute *a, int n)
1061{
1062 struct device *dev = container_of(kobj, struct device, kobj);
1063 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1064
1065 if (a == &dev_attr_uuid.attr) {
1066 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1067 return 0;
1068 }
1069 if (a == &dev_attr_eui.attr) {
1070 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1071 return 0;
1072 }
1073 return a->mode;
1074}
1075
1076static const struct attribute_group nvme_ns_attr_group = {
1077 .attrs = nvme_ns_attrs,
1078 .is_visible = nvme_attrs_are_visible,
1079};
1080
779ff756
KB
1081#define nvme_show_function(field) \
1082static ssize_t field##_show(struct device *dev, \
1083 struct device_attribute *attr, char *buf) \
1084{ \
1085 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
1086 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
1087} \
1088static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1089
1090nvme_show_function(model);
1091nvme_show_function(serial);
1092nvme_show_function(firmware_rev);
1093
1094static struct attribute *nvme_dev_attrs[] = {
1095 &dev_attr_reset_controller.attr,
1096 &dev_attr_model.attr,
1097 &dev_attr_serial.attr,
1098 &dev_attr_firmware_rev.attr,
1099 NULL
1100};
1101
1102static struct attribute_group nvme_dev_attrs_group = {
1103 .attrs = nvme_dev_attrs,
1104};
1105
1106static const struct attribute_group *nvme_dev_attr_groups[] = {
1107 &nvme_dev_attrs_group,
1108 NULL,
1109};
1110
5bae7f73
CH
1111static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
1112{
1113 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
1114 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
1115
1116 return nsa->ns_id - nsb->ns_id;
1117}
1118
1119static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1120{
1121 struct nvme_ns *ns;
1122
69d3b8ac
CH
1123 lockdep_assert_held(&ctrl->namespaces_mutex);
1124
5bae7f73
CH
1125 list_for_each_entry(ns, &ctrl->namespaces, list) {
1126 if (ns->ns_id == nsid)
1127 return ns;
1128 if (ns->ns_id > nsid)
1129 break;
1130 }
1131 return NULL;
1132}
1133
1134static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1135{
1136 struct nvme_ns *ns;
1137 struct gendisk *disk;
1138 int node = dev_to_node(ctrl->dev);
1139
69d3b8ac
CH
1140 lockdep_assert_held(&ctrl->namespaces_mutex);
1141
5bae7f73
CH
1142 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
1143 if (!ns)
1144 return;
1145
1146 ns->queue = blk_mq_init_queue(ctrl->tagset);
1147 if (IS_ERR(ns->queue))
1148 goto out_free_ns;
1149 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
1150 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1151 ns->queue->queuedata = ns;
1152 ns->ctrl = ctrl;
1153
1154 disk = alloc_disk_node(0, node);
1155 if (!disk)
1156 goto out_free_queue;
1157
1158 kref_init(&ns->kref);
1159 ns->ns_id = nsid;
1160 ns->disk = disk;
1161 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
5bae7f73
CH
1162
1163 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1164 if (ctrl->max_hw_sectors) {
1165 blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
1166 blk_queue_max_segments(ns->queue,
1167 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
1168 }
1169 if (ctrl->stripe_size)
1170 blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
1171 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1172 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
1173 blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
1174
1175 disk->major = nvme_major;
1176 disk->first_minor = 0;
1177 disk->fops = &nvme_fops;
1178 disk->private_data = ns;
1179 disk->queue = ns->queue;
1180 disk->driverfs_dev = ctrl->device;
1181 disk->flags = GENHD_FL_EXT_DEVT;
1182 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
1183
5bae7f73
CH
1184 if (nvme_revalidate_disk(ns->disk))
1185 goto out_free_disk;
1186
4b9d5b15 1187 list_add_tail(&ns->list, &ctrl->namespaces);
5bae7f73 1188 kref_get(&ctrl->kref);
2b9b6e86
KB
1189 if (ns->type == NVME_NS_LIGHTNVM)
1190 return;
5bae7f73 1191
2b9b6e86
KB
1192 add_disk(ns->disk);
1193 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1194 &nvme_ns_attr_group))
1195 pr_warn("%s: failed to create sysfs group for identification\n",
1196 ns->disk->disk_name);
5bae7f73
CH
1197 return;
1198 out_free_disk:
1199 kfree(disk);
5bae7f73
CH
1200 out_free_queue:
1201 blk_cleanup_queue(ns->queue);
1202 out_free_ns:
1203 kfree(ns);
1204}
1205
1206static void nvme_ns_remove(struct nvme_ns *ns)
1207{
1208 bool kill = nvme_io_incapable(ns->ctrl) &&
1209 !blk_queue_dying(ns->queue);
1210
69d3b8ac
CH
1211 lockdep_assert_held(&ns->ctrl->namespaces_mutex);
1212
3e1e21c7 1213 if (kill) {
5bae7f73 1214 blk_set_queue_dying(ns->queue);
3e1e21c7
LT
1215
1216 /*
1217 * The controller was shutdown first if we got here through
1218 * device removal. The shutdown may requeue outstanding
1219 * requests. These need to be aborted immediately so
1220 * del_gendisk doesn't block indefinitely for their completion.
1221 */
1222 blk_mq_abort_requeue_list(ns->queue);
1223 }
5bae7f73
CH
1224 if (ns->disk->flags & GENHD_FL_UP) {
1225 if (blk_get_integrity(ns->disk))
1226 blk_integrity_unregister(ns->disk);
2b9b6e86
KB
1227 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1228 &nvme_ns_attr_group);
5bae7f73
CH
1229 del_gendisk(ns->disk);
1230 }
1231 if (kill || !blk_queue_dying(ns->queue)) {
1232 blk_mq_abort_requeue_list(ns->queue);
1233 blk_cleanup_queue(ns->queue);
1234 }
1235 list_del_init(&ns->list);
1236 nvme_put_ns(ns);
1237}
1238
540c801c
KB
1239static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1240{
1241 struct nvme_ns *ns;
1242
1243 ns = nvme_find_ns(ctrl, nsid);
1244 if (ns) {
1245 if (revalidate_disk(ns->disk))
1246 nvme_ns_remove(ns);
1247 } else
1248 nvme_alloc_ns(ctrl, nsid);
1249}
1250
1251static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
1252{
1253 struct nvme_ns *ns;
1254 __le32 *ns_list;
1255 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
1256 int ret = 0;
1257
1258 ns_list = kzalloc(0x1000, GFP_KERNEL);
1259 if (!ns_list)
1260 return -ENOMEM;
1261
1262 for (i = 0; i < num_lists; i++) {
1263 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
1264 if (ret)
1265 goto out;
1266
1267 for (j = 0; j < min(nn, 1024U); j++) {
1268 nsid = le32_to_cpu(ns_list[j]);
1269 if (!nsid)
1270 goto out;
1271
1272 nvme_validate_ns(ctrl, nsid);
1273
1274 while (++prev < nsid) {
1275 ns = nvme_find_ns(ctrl, prev);
1276 if (ns)
1277 nvme_ns_remove(ns);
1278 }
1279 }
1280 nn -= j;
1281 }
1282 out:
1283 kfree(ns_list);
1284 return ret;
1285}
1286
5bae7f73
CH
1287static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
1288{
1289 struct nvme_ns *ns, *next;
1290 unsigned i;
1291
69d3b8ac
CH
1292 lockdep_assert_held(&ctrl->namespaces_mutex);
1293
540c801c
KB
1294 for (i = 1; i <= nn; i++)
1295 nvme_validate_ns(ctrl, i);
1296
5bae7f73
CH
1297 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
1298 if (ns->ns_id > nn)
1299 nvme_ns_remove(ns);
1300 }
5bae7f73
CH
1301}
1302
1303void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
1304{
1305 struct nvme_id_ctrl *id;
540c801c 1306 unsigned nn;
5bae7f73
CH
1307
1308 if (nvme_identify_ctrl(ctrl, &id))
1309 return;
540c801c 1310
69d3b8ac 1311 mutex_lock(&ctrl->namespaces_mutex);
540c801c
KB
1312 nn = le32_to_cpu(id->nn);
1313 if (ctrl->vs >= NVME_VS(1, 1) &&
1314 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1315 if (!nvme_scan_ns_list(ctrl, nn))
1316 goto done;
1317 }
5bae7f73 1318 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
540c801c
KB
1319 done:
1320 list_sort(NULL, &ctrl->namespaces, ns_cmp);
69d3b8ac 1321 mutex_unlock(&ctrl->namespaces_mutex);
5bae7f73
CH
1322 kfree(id);
1323}
1324
1325void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1326{
1327 struct nvme_ns *ns, *next;
1328
69d3b8ac 1329 mutex_lock(&ctrl->namespaces_mutex);
5bae7f73
CH
1330 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1331 nvme_ns_remove(ns);
69d3b8ac 1332 mutex_unlock(&ctrl->namespaces_mutex);
5bae7f73
CH
1333}
1334
f3ca80fc
CH
1335static DEFINE_IDA(nvme_instance_ida);
1336
1337static int nvme_set_instance(struct nvme_ctrl *ctrl)
1338{
1339 int instance, error;
1340
1341 do {
1342 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1343 return -ENODEV;
1344
1345 spin_lock(&dev_list_lock);
1346 error = ida_get_new(&nvme_instance_ida, &instance);
1347 spin_unlock(&dev_list_lock);
1348 } while (error == -EAGAIN);
1349
1350 if (error)
1351 return -ENODEV;
1352
1353 ctrl->instance = instance;
1354 return 0;
1355}
1356
1357static void nvme_release_instance(struct nvme_ctrl *ctrl)
1358{
1359 spin_lock(&dev_list_lock);
1360 ida_remove(&nvme_instance_ida, ctrl->instance);
1361 spin_unlock(&dev_list_lock);
1362}
1363
53029b04
KB
1364void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
1365 {
53029b04 1366 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
f3ca80fc
CH
1367
1368 spin_lock(&dev_list_lock);
1369 list_del(&ctrl->node);
1370 spin_unlock(&dev_list_lock);
53029b04
KB
1371}
1372
1373static void nvme_free_ctrl(struct kref *kref)
1374{
1375 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
f3ca80fc
CH
1376
1377 put_device(ctrl->device);
1378 nvme_release_instance(ctrl);
f3ca80fc
CH
1379
1380 ctrl->ops->free_ctrl(ctrl);
1381}
1382
1383void nvme_put_ctrl(struct nvme_ctrl *ctrl)
1384{
1385 kref_put(&ctrl->kref, nvme_free_ctrl);
1386}
1387
1388/*
1389 * Initialize a NVMe controller structures. This needs to be called during
1390 * earliest initialization so that we have the initialized structured around
1391 * during probing.
1392 */
1393int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1394 const struct nvme_ctrl_ops *ops, unsigned long quirks)
1395{
1396 int ret;
1397
1398 INIT_LIST_HEAD(&ctrl->namespaces);
69d3b8ac 1399 mutex_init(&ctrl->namespaces_mutex);
f3ca80fc
CH
1400 kref_init(&ctrl->kref);
1401 ctrl->dev = dev;
1402 ctrl->ops = ops;
1403 ctrl->quirks = quirks;
1404
1405 ret = nvme_set_instance(ctrl);
1406 if (ret)
1407 goto out;
1408
779ff756 1409 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
f3ca80fc 1410 MKDEV(nvme_char_major, ctrl->instance),
f4f0f63e 1411 ctrl, nvme_dev_attr_groups,
779ff756 1412 "nvme%d", ctrl->instance);
f3ca80fc
CH
1413 if (IS_ERR(ctrl->device)) {
1414 ret = PTR_ERR(ctrl->device);
1415 goto out_release_instance;
1416 }
1417 get_device(ctrl->device);
f3ca80fc 1418
f3ca80fc
CH
1419 spin_lock(&dev_list_lock);
1420 list_add_tail(&ctrl->node, &nvme_ctrl_list);
1421 spin_unlock(&dev_list_lock);
1422
1423 return 0;
f3ca80fc
CH
1424out_release_instance:
1425 nvme_release_instance(ctrl);
1426out:
1427 return ret;
1428}
1429
25646264 1430void nvme_stop_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
1431{
1432 struct nvme_ns *ns;
1433
69d3b8ac 1434 mutex_lock(&ctrl->namespaces_mutex);
363c9aac 1435 list_for_each_entry(ns, &ctrl->namespaces, list) {
363c9aac
SG
1436 spin_lock_irq(ns->queue->queue_lock);
1437 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
1438 spin_unlock_irq(ns->queue->queue_lock);
1439
1440 blk_mq_cancel_requeue_work(ns->queue);
1441 blk_mq_stop_hw_queues(ns->queue);
1442 }
69d3b8ac 1443 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac
SG
1444}
1445
25646264 1446void nvme_start_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
1447{
1448 struct nvme_ns *ns;
1449
69d3b8ac 1450 mutex_lock(&ctrl->namespaces_mutex);
363c9aac
SG
1451 list_for_each_entry(ns, &ctrl->namespaces, list) {
1452 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
363c9aac
SG
1453 blk_mq_start_stopped_hw_queues(ns->queue, true);
1454 blk_mq_kick_requeue_list(ns->queue);
1455 }
69d3b8ac 1456 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac
SG
1457}
1458
5bae7f73
CH
1459int __init nvme_core_init(void)
1460{
1461 int result;
1462
1463 result = register_blkdev(nvme_major, "nvme");
1464 if (result < 0)
1465 return result;
1466 else if (result > 0)
1467 nvme_major = result;
1468
f3ca80fc
CH
1469 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
1470 &nvme_dev_fops);
1471 if (result < 0)
1472 goto unregister_blkdev;
1473 else if (result > 0)
1474 nvme_char_major = result;
1475
1476 nvme_class = class_create(THIS_MODULE, "nvme");
1477 if (IS_ERR(nvme_class)) {
1478 result = PTR_ERR(nvme_class);
1479 goto unregister_chrdev;
1480 }
1481
5bae7f73 1482 return 0;
f3ca80fc
CH
1483
1484 unregister_chrdev:
1485 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1486 unregister_blkdev:
1487 unregister_blkdev(nvme_major, "nvme");
1488 return result;
5bae7f73
CH
1489}
1490
1491void nvme_core_exit(void)
1492{
1493 unregister_blkdev(nvme_major, "nvme");
f3ca80fc
CH
1494 class_destroy(nvme_class);
1495 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
5bae7f73 1496}