]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/nvme/host/core.c
nvme: switch controller refcounting to use struct device
[mirror_ubuntu-bionic-kernel.git] / drivers / nvme / host / core.c
CommitLineData
21d34711
CH
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/blkdev.h>
16#include <linux/blk-mq.h>
5fd4ce1b 17#include <linux/delay.h>
21d34711 18#include <linux/errno.h>
1673f1f0 19#include <linux/hdreg.h>
21d34711 20#include <linux/kernel.h>
5bae7f73
CH
21#include <linux/module.h>
22#include <linux/list_sort.h>
21d34711
CH
23#include <linux/slab.h>
24#include <linux/types.h>
1673f1f0
CH
25#include <linux/pr.h>
26#include <linux/ptrace.h>
27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h>
c5552fde 29#include <linux/pm_qos.h>
1673f1f0 30#include <asm/unaligned.h>
21d34711
CH
31
32#include "nvme.h"
038bd4cb 33#include "fabrics.h"
21d34711 34
f3ca80fc
CH
35#define NVME_MINORS (1U << MINORBITS)
36
8ae4e447
MO
37unsigned int admin_timeout = 60;
38module_param(admin_timeout, uint, 0644);
ba0ba7d3 39MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
576d55d6 40EXPORT_SYMBOL_GPL(admin_timeout);
ba0ba7d3 41
8ae4e447
MO
42unsigned int nvme_io_timeout = 30;
43module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
ba0ba7d3 44MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
576d55d6 45EXPORT_SYMBOL_GPL(nvme_io_timeout);
ba0ba7d3 46
b3b1b0b0 47static unsigned char shutdown_timeout = 5;
ba0ba7d3
ML
48module_param(shutdown_timeout, byte, 0644);
49MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
50
44e44b29
CH
51static u8 nvme_max_retries = 5;
52module_param_named(max_retries, nvme_max_retries, byte, 0644);
f80ec966 53MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
5bae7f73 54
f3ca80fc
CH
55static int nvme_char_major;
56module_param(nvme_char_major, int, 0);
57
9947d6a0 58static unsigned long default_ps_max_latency_us = 100000;
c5552fde
AL
59module_param(default_ps_max_latency_us, ulong, 0644);
60MODULE_PARM_DESC(default_ps_max_latency_us,
61 "max power saving latency for new devices; use PM QOS to change per device");
62
c35e30b4
AL
63static bool force_apst;
64module_param(force_apst, bool, 0644);
65MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
66
f5d11840
JA
67static bool streams;
68module_param(streams, bool, 0644);
69MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
70
9a6327d2
SG
71struct workqueue_struct *nvme_wq;
72EXPORT_SYMBOL_GPL(nvme_wq);
73
f3ca80fc 74static LIST_HEAD(nvme_ctrl_list);
9f2482b9 75static DEFINE_SPINLOCK(dev_list_lock);
1673f1f0 76
9843f685
CH
77static DEFINE_IDA(nvme_instance_ida);
78
f3ca80fc
CH
79static struct class *nvme_class;
80
b6dccf7f
AD
81static __le32 nvme_get_log_dw10(u8 lid, size_t size)
82{
83 return cpu_to_le32((((size / 4) - 1) << 16) | lid);
84}
85
d86c4d8e
CH
86int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
87{
88 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
89 return -EBUSY;
90 if (!queue_work(nvme_wq, &ctrl->reset_work))
91 return -EBUSY;
92 return 0;
93}
94EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
95
96static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
97{
98 int ret;
99
100 ret = nvme_reset_ctrl(ctrl);
101 if (!ret)
102 flush_work(&ctrl->reset_work);
103 return ret;
104}
105
2a842aca 106static blk_status_t nvme_error_status(struct request *req)
27fa9bc5
CH
107{
108 switch (nvme_req(req)->status & 0x7ff) {
109 case NVME_SC_SUCCESS:
2a842aca 110 return BLK_STS_OK;
27fa9bc5 111 case NVME_SC_CAP_EXCEEDED:
2a842aca 112 return BLK_STS_NOSPC;
e02ab023 113 case NVME_SC_ONCS_NOT_SUPPORTED:
2a842aca 114 return BLK_STS_NOTSUPP;
e02ab023
JG
115 case NVME_SC_WRITE_FAULT:
116 case NVME_SC_READ_ERROR:
117 case NVME_SC_UNWRITTEN_BLOCK:
a751da33
CH
118 case NVME_SC_ACCESS_DENIED:
119 case NVME_SC_READ_ONLY:
2a842aca 120 return BLK_STS_MEDIUM;
a751da33
CH
121 case NVME_SC_GUARD_CHECK:
122 case NVME_SC_APPTAG_CHECK:
123 case NVME_SC_REFTAG_CHECK:
124 case NVME_SC_INVALID_PI:
125 return BLK_STS_PROTECTION;
126 case NVME_SC_RESERVATION_CONFLICT:
127 return BLK_STS_NEXUS;
2a842aca
CH
128 default:
129 return BLK_STS_IOERR;
27fa9bc5
CH
130 }
131}
27fa9bc5 132
f6324b1b 133static inline bool nvme_req_needs_retry(struct request *req)
77f02a7a 134{
f6324b1b
CH
135 if (blk_noretry_request(req))
136 return false;
27fa9bc5 137 if (nvme_req(req)->status & NVME_SC_DNR)
f6324b1b 138 return false;
44e44b29 139 if (nvme_req(req)->retries >= nvme_max_retries)
f6324b1b
CH
140 return false;
141 return true;
77f02a7a
CH
142}
143
144void nvme_complete_rq(struct request *req)
145{
27fa9bc5
CH
146 if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
147 nvme_req(req)->retries++;
8d7b8faf 148 blk_mq_requeue_request(req, true);
27fa9bc5 149 return;
77f02a7a
CH
150 }
151
27fa9bc5 152 blk_mq_end_request(req, nvme_error_status(req));
77f02a7a
CH
153}
154EXPORT_SYMBOL_GPL(nvme_complete_rq);
155
c55a2fd4
ML
156void nvme_cancel_request(struct request *req, void *data, bool reserved)
157{
158 int status;
159
160 if (!blk_mq_request_started(req))
161 return;
162
163 dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
164 "Cancelling I/O %d", req->tag);
165
166 status = NVME_SC_ABORT_REQ;
167 if (blk_queue_dying(req->q))
168 status |= NVME_SC_DNR;
27fa9bc5 169 nvme_req(req)->status = status;
08e0029a 170 blk_mq_complete_request(req);
27fa9bc5 171
c55a2fd4
ML
172}
173EXPORT_SYMBOL_GPL(nvme_cancel_request);
174
bb8d261e
CH
175bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
176 enum nvme_ctrl_state new_state)
177{
f6b6a28e 178 enum nvme_ctrl_state old_state;
0a72bbba 179 unsigned long flags;
bb8d261e
CH
180 bool changed = false;
181
0a72bbba 182 spin_lock_irqsave(&ctrl->lock, flags);
f6b6a28e
GKB
183
184 old_state = ctrl->state;
bb8d261e
CH
185 switch (new_state) {
186 case NVME_CTRL_LIVE:
187 switch (old_state) {
7d2e8008 188 case NVME_CTRL_NEW:
bb8d261e 189 case NVME_CTRL_RESETTING:
def61eca 190 case NVME_CTRL_RECONNECTING:
bb8d261e
CH
191 changed = true;
192 /* FALLTHRU */
193 default:
194 break;
195 }
196 break;
197 case NVME_CTRL_RESETTING:
198 switch (old_state) {
199 case NVME_CTRL_NEW:
def61eca 200 case NVME_CTRL_LIVE:
def61eca
CH
201 changed = true;
202 /* FALLTHRU */
203 default:
204 break;
205 }
206 break;
207 case NVME_CTRL_RECONNECTING:
208 switch (old_state) {
bb8d261e
CH
209 case NVME_CTRL_LIVE:
210 changed = true;
211 /* FALLTHRU */
212 default:
213 break;
214 }
215 break;
216 case NVME_CTRL_DELETING:
217 switch (old_state) {
218 case NVME_CTRL_LIVE:
219 case NVME_CTRL_RESETTING:
def61eca 220 case NVME_CTRL_RECONNECTING:
bb8d261e
CH
221 changed = true;
222 /* FALLTHRU */
223 default:
224 break;
225 }
226 break;
0ff9d4e1
KB
227 case NVME_CTRL_DEAD:
228 switch (old_state) {
229 case NVME_CTRL_DELETING:
230 changed = true;
231 /* FALLTHRU */
232 default:
233 break;
234 }
235 break;
bb8d261e
CH
236 default:
237 break;
238 }
bb8d261e
CH
239
240 if (changed)
241 ctrl->state = new_state;
242
0a72bbba 243 spin_unlock_irqrestore(&ctrl->lock, flags);
f6b6a28e 244
bb8d261e
CH
245 return changed;
246}
247EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
248
1673f1f0
CH
249static void nvme_free_ns(struct kref *kref)
250{
251 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
252
b0b4e09c
MB
253 if (ns->ndev)
254 nvme_nvm_unregister(ns);
1673f1f0 255
1673f1f0 256 put_disk(ns->disk);
075790eb
KB
257 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
258 nvme_put_ctrl(ns->ctrl);
1673f1f0
CH
259 kfree(ns);
260}
261
5bae7f73 262static void nvme_put_ns(struct nvme_ns *ns)
1673f1f0
CH
263{
264 kref_put(&ns->kref, nvme_free_ns);
265}
266
4160982e 267struct request *nvme_alloc_request(struct request_queue *q,
eb71f435 268 struct nvme_command *cmd, unsigned int flags, int qid)
21d34711 269{
aebf526b 270 unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
21d34711 271 struct request *req;
21d34711 272
eb71f435 273 if (qid == NVME_QID_ANY) {
aebf526b 274 req = blk_mq_alloc_request(q, op, flags);
eb71f435 275 } else {
aebf526b 276 req = blk_mq_alloc_request_hctx(q, op, flags,
eb71f435
CH
277 qid ? qid - 1 : 0);
278 }
21d34711 279 if (IS_ERR(req))
4160982e 280 return req;
21d34711 281
21d34711 282 req->cmd_flags |= REQ_FAILFAST_DRIVER;
d49187e9 283 nvme_req(req)->cmd = cmd;
21d34711 284
4160982e
CH
285 return req;
286}
576d55d6 287EXPORT_SYMBOL_GPL(nvme_alloc_request);
4160982e 288
f5d11840
JA
289static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
290{
291 struct nvme_command c;
292
293 memset(&c, 0, sizeof(c));
294
295 c.directive.opcode = nvme_admin_directive_send;
62346eae 296 c.directive.nsid = cpu_to_le32(NVME_NSID_ALL);
f5d11840
JA
297 c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
298 c.directive.dtype = NVME_DIR_IDENTIFY;
299 c.directive.tdtype = NVME_DIR_STREAMS;
300 c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
301
302 return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
303}
304
305static int nvme_disable_streams(struct nvme_ctrl *ctrl)
306{
307 return nvme_toggle_streams(ctrl, false);
308}
309
310static int nvme_enable_streams(struct nvme_ctrl *ctrl)
311{
312 return nvme_toggle_streams(ctrl, true);
313}
314
315static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
316 struct streams_directive_params *s, u32 nsid)
317{
318 struct nvme_command c;
319
320 memset(&c, 0, sizeof(c));
321 memset(s, 0, sizeof(*s));
322
323 c.directive.opcode = nvme_admin_directive_recv;
324 c.directive.nsid = cpu_to_le32(nsid);
a082b426 325 c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
f5d11840
JA
326 c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
327 c.directive.dtype = NVME_DIR_STREAMS;
328
329 return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
330}
331
332static int nvme_configure_directives(struct nvme_ctrl *ctrl)
333{
334 struct streams_directive_params s;
335 int ret;
336
337 if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
338 return 0;
339 if (!streams)
340 return 0;
341
342 ret = nvme_enable_streams(ctrl);
343 if (ret)
344 return ret;
345
62346eae 346 ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
f5d11840
JA
347 if (ret)
348 return ret;
349
350 ctrl->nssa = le16_to_cpu(s.nssa);
351 if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
352 dev_info(ctrl->device, "too few streams (%u) available\n",
353 ctrl->nssa);
354 nvme_disable_streams(ctrl);
355 return 0;
356 }
357
358 ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
359 dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
360 return 0;
361}
362
363/*
364 * Check if 'req' has a write hint associated with it. If it does, assign
365 * a valid namespace stream to the write.
366 */
367static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
368 struct request *req, u16 *control,
369 u32 *dsmgmt)
370{
371 enum rw_hint streamid = req->write_hint;
372
373 if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
374 streamid = 0;
375 else {
376 streamid--;
377 if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
378 return;
379
380 *control |= NVME_RW_DTYPE_STREAMS;
381 *dsmgmt |= streamid << 16;
382 }
383
384 if (streamid < ARRAY_SIZE(req->q->write_hints))
385 req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
386}
387
8093f7ca
ML
388static inline void nvme_setup_flush(struct nvme_ns *ns,
389 struct nvme_command *cmnd)
390{
391 memset(cmnd, 0, sizeof(*cmnd));
392 cmnd->common.opcode = nvme_cmd_flush;
393 cmnd->common.nsid = cpu_to_le32(ns->ns_id);
394}
395
fc17b653 396static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
8093f7ca
ML
397 struct nvme_command *cmnd)
398{
b35ba01e 399 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
8093f7ca 400 struct nvme_dsm_range *range;
b35ba01e 401 struct bio *bio;
8093f7ca 402
b35ba01e 403 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
8093f7ca 404 if (!range)
fc17b653 405 return BLK_STS_RESOURCE;
8093f7ca 406
b35ba01e
CH
407 __rq_for_each_bio(bio, req) {
408 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
409 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
410
411 range[n].cattr = cpu_to_le32(0);
412 range[n].nlb = cpu_to_le32(nlb);
413 range[n].slba = cpu_to_le64(slba);
414 n++;
415 }
416
417 if (WARN_ON_ONCE(n != segments)) {
418 kfree(range);
fc17b653 419 return BLK_STS_IOERR;
b35ba01e 420 }
8093f7ca
ML
421
422 memset(cmnd, 0, sizeof(*cmnd));
423 cmnd->dsm.opcode = nvme_cmd_dsm;
424 cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
f1dd03a8 425 cmnd->dsm.nr = cpu_to_le32(segments - 1);
8093f7ca
ML
426 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
427
f9d03f96
CH
428 req->special_vec.bv_page = virt_to_page(range);
429 req->special_vec.bv_offset = offset_in_page(range);
b35ba01e 430 req->special_vec.bv_len = sizeof(*range) * segments;
f9d03f96 431 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
8093f7ca 432
fc17b653 433 return BLK_STS_OK;
8093f7ca 434}
8093f7ca 435
ebe6d874
CH
436static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
437 struct request *req, struct nvme_command *cmnd)
8093f7ca 438{
f5d11840 439 struct nvme_ctrl *ctrl = ns->ctrl;
8093f7ca
ML
440 u16 control = 0;
441 u32 dsmgmt = 0;
442
ebe6d874
CH
443 /*
444 * If formated with metadata, require the block layer provide a buffer
445 * unless this namespace is formated such that the metadata can be
446 * stripped/generated by the controller with PRACT=1.
447 */
8fa61121
SG
448 if (ns && ns->ms &&
449 (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
ebe6d874
CH
450 !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
451 return BLK_STS_NOTSUPP;
452
8093f7ca
ML
453 if (req->cmd_flags & REQ_FUA)
454 control |= NVME_RW_FUA;
455 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
456 control |= NVME_RW_LR;
457
458 if (req->cmd_flags & REQ_RAHEAD)
459 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
460
461 memset(cmnd, 0, sizeof(*cmnd));
462 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
8093f7ca
ML
463 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
464 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
465 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
466
f5d11840
JA
467 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
468 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
469
8093f7ca
ML
470 if (ns->ms) {
471 switch (ns->pi_type) {
472 case NVME_NS_DPS_PI_TYPE3:
473 control |= NVME_RW_PRINFO_PRCHK_GUARD;
474 break;
475 case NVME_NS_DPS_PI_TYPE1:
476 case NVME_NS_DPS_PI_TYPE2:
477 control |= NVME_RW_PRINFO_PRCHK_GUARD |
478 NVME_RW_PRINFO_PRCHK_REF;
479 cmnd->rw.reftag = cpu_to_le32(
480 nvme_block_nr(ns, blk_rq_pos(req)));
481 break;
482 }
483 if (!blk_integrity_rq(req))
484 control |= NVME_RW_PRINFO_PRACT;
485 }
486
487 cmnd->rw.control = cpu_to_le16(control);
488 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
ebe6d874 489 return 0;
8093f7ca
ML
490}
491
fc17b653 492blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
8093f7ca
ML
493 struct nvme_command *cmd)
494{
fc17b653 495 blk_status_t ret = BLK_STS_OK;
8093f7ca 496
987f699a 497 if (!(req->rq_flags & RQF_DONTPREP)) {
44e44b29 498 nvme_req(req)->retries = 0;
27fa9bc5 499 nvme_req(req)->flags = 0;
987f699a
CH
500 req->rq_flags |= RQF_DONTPREP;
501 }
502
aebf526b
CH
503 switch (req_op(req)) {
504 case REQ_OP_DRV_IN:
505 case REQ_OP_DRV_OUT:
d49187e9 506 memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
aebf526b
CH
507 break;
508 case REQ_OP_FLUSH:
8093f7ca 509 nvme_setup_flush(ns, cmd);
aebf526b 510 break;
e850fd16
CH
511 case REQ_OP_WRITE_ZEROES:
512 /* currently only aliased to deallocate for a few ctrls: */
aebf526b 513 case REQ_OP_DISCARD:
8093f7ca 514 ret = nvme_setup_discard(ns, req, cmd);
aebf526b
CH
515 break;
516 case REQ_OP_READ:
517 case REQ_OP_WRITE:
ebe6d874 518 ret = nvme_setup_rw(ns, req, cmd);
aebf526b
CH
519 break;
520 default:
521 WARN_ON_ONCE(1);
fc17b653 522 return BLK_STS_IOERR;
aebf526b 523 }
8093f7ca 524
721b3917 525 cmd->common.command_id = req->tag;
8093f7ca
ML
526 return ret;
527}
528EXPORT_SYMBOL_GPL(nvme_setup_cmd);
529
4160982e
CH
530/*
531 * Returns 0 on success. If the result is negative, it's a Linux error code;
532 * if the result is positive, it's an NVM Express status code
533 */
534int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
d49187e9 535 union nvme_result *result, void *buffer, unsigned bufflen,
eb71f435 536 unsigned timeout, int qid, int at_head, int flags)
4160982e
CH
537{
538 struct request *req;
539 int ret;
540
eb71f435 541 req = nvme_alloc_request(q, cmd, flags, qid);
4160982e
CH
542 if (IS_ERR(req))
543 return PTR_ERR(req);
544
545 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
546
21d34711
CH
547 if (buffer && bufflen) {
548 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
549 if (ret)
550 goto out;
4160982e
CH
551 }
552
eb71f435 553 blk_execute_rq(req->q, NULL, req, at_head);
d49187e9
CH
554 if (result)
555 *result = nvme_req(req)->result;
27fa9bc5
CH
556 if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
557 ret = -EINTR;
558 else
559 ret = nvme_req(req)->status;
4160982e
CH
560 out:
561 blk_mq_free_request(req);
562 return ret;
563}
eb71f435 564EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
4160982e
CH
565
566int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
567 void *buffer, unsigned bufflen)
568{
eb71f435
CH
569 return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0,
570 NVME_QID_ANY, 0, 0);
4160982e 571}
576d55d6 572EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
4160982e 573
1cad6562
CH
574static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
575 unsigned len, u32 seed, bool write)
576{
577 struct bio_integrity_payload *bip;
578 int ret = -ENOMEM;
579 void *buf;
580
581 buf = kmalloc(len, GFP_KERNEL);
582 if (!buf)
583 goto out;
584
585 ret = -EFAULT;
586 if (write && copy_from_user(buf, ubuf, len))
587 goto out_free_meta;
588
589 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
590 if (IS_ERR(bip)) {
591 ret = PTR_ERR(bip);
592 goto out_free_meta;
593 }
594
595 bip->bip_iter.bi_size = len;
596 bip->bip_iter.bi_sector = seed;
597 ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
598 offset_in_page(buf));
599 if (ret == len)
600 return buf;
601 ret = -ENOMEM;
602out_free_meta:
603 kfree(buf);
604out:
605 return ERR_PTR(ret);
606}
607
63263d60 608static int nvme_submit_user_cmd(struct request_queue *q,
485783ca
KB
609 struct nvme_command *cmd, void __user *ubuffer,
610 unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
611 u32 meta_seed, u32 *result, unsigned timeout)
4160982e 612{
7a5abb4b 613 bool write = nvme_is_write(cmd);
0b7f1f26
KB
614 struct nvme_ns *ns = q->queuedata;
615 struct gendisk *disk = ns ? ns->disk : NULL;
4160982e 616 struct request *req;
0b7f1f26
KB
617 struct bio *bio = NULL;
618 void *meta = NULL;
4160982e
CH
619 int ret;
620
eb71f435 621 req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY);
4160982e
CH
622 if (IS_ERR(req))
623 return PTR_ERR(req);
624
625 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
626
627 if (ubuffer && bufflen) {
21d34711
CH
628 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
629 GFP_KERNEL);
630 if (ret)
631 goto out;
632 bio = req->bio;
74d46992 633 bio->bi_disk = disk;
1cad6562
CH
634 if (disk && meta_buffer && meta_len) {
635 meta = nvme_add_user_metadata(bio, meta_buffer, meta_len,
636 meta_seed, write);
637 if (IS_ERR(meta)) {
638 ret = PTR_ERR(meta);
0b7f1f26
KB
639 goto out_unmap;
640 }
0b7f1f26
KB
641 }
642 }
1cad6562 643
0b7f1f26 644 blk_execute_rq(req->q, disk, req, 0);
27fa9bc5
CH
645 if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
646 ret = -EINTR;
647 else
648 ret = nvme_req(req)->status;
21d34711 649 if (result)
d49187e9 650 *result = le32_to_cpu(nvme_req(req)->result.u32);
0b7f1f26
KB
651 if (meta && !ret && !write) {
652 if (copy_to_user(meta_buffer, meta, meta_len))
653 ret = -EFAULT;
654 }
0b7f1f26
KB
655 kfree(meta);
656 out_unmap:
74d46992 657 if (bio)
0b7f1f26 658 blk_rq_unmap_user(bio);
21d34711
CH
659 out:
660 blk_mq_free_request(req);
661 return ret;
662}
663
2a842aca 664static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
038bd4cb
SG
665{
666 struct nvme_ctrl *ctrl = rq->end_io_data;
667
668 blk_mq_free_request(rq);
669
2a842aca 670 if (status) {
038bd4cb 671 dev_err(ctrl->device,
2a842aca
CH
672 "failed nvme_keep_alive_end_io error=%d\n",
673 status);
038bd4cb
SG
674 return;
675 }
676
677 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
678}
679
680static int nvme_keep_alive(struct nvme_ctrl *ctrl)
681{
682 struct nvme_command c;
683 struct request *rq;
684
685 memset(&c, 0, sizeof(c));
686 c.common.opcode = nvme_admin_keep_alive;
687
688 rq = nvme_alloc_request(ctrl->admin_q, &c, BLK_MQ_REQ_RESERVED,
689 NVME_QID_ANY);
690 if (IS_ERR(rq))
691 return PTR_ERR(rq);
692
693 rq->timeout = ctrl->kato * HZ;
694 rq->end_io_data = ctrl;
695
696 blk_execute_rq_nowait(rq->q, NULL, rq, 0, nvme_keep_alive_end_io);
697
698 return 0;
699}
700
701static void nvme_keep_alive_work(struct work_struct *work)
702{
703 struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
704 struct nvme_ctrl, ka_work);
705
706 if (nvme_keep_alive(ctrl)) {
707 /* allocation failure, reset the controller */
708 dev_err(ctrl->device, "keep-alive failed\n");
39bdc590 709 nvme_reset_ctrl(ctrl);
038bd4cb
SG
710 return;
711 }
712}
713
714void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
715{
716 if (unlikely(ctrl->kato == 0))
717 return;
718
719 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
720 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
721}
722EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
723
724void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
725{
726 if (unlikely(ctrl->kato == 0))
727 return;
728
729 cancel_delayed_work_sync(&ctrl->ka_work);
730}
731EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
732
3f7f25a9 733static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
21d34711
CH
734{
735 struct nvme_command c = { };
736 int error;
737
738 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
739 c.identify.opcode = nvme_admin_identify;
986994a2 740 c.identify.cns = NVME_ID_CNS_CTRL;
21d34711
CH
741
742 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
743 if (!*id)
744 return -ENOMEM;
745
746 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
747 sizeof(struct nvme_id_ctrl));
748 if (error)
749 kfree(*id);
750 return error;
751}
752
cdbff4f2
CH
753static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
754 u8 *eui64, u8 *nguid, uuid_t *uuid)
3b22ba26
JT
755{
756 struct nvme_command c = { };
757 int status;
758 void *data;
759 int pos;
760 int len;
761
762 c.identify.opcode = nvme_admin_identify;
763 c.identify.nsid = cpu_to_le32(nsid);
764 c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
765
766 data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
767 if (!data)
768 return -ENOMEM;
769
cdbff4f2 770 status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data,
3b22ba26
JT
771 NVME_IDENTIFY_DATA_SIZE);
772 if (status)
773 goto free_data;
774
775 for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
776 struct nvme_ns_id_desc *cur = data + pos;
777
778 if (cur->nidl == 0)
779 break;
780
781 switch (cur->nidt) {
782 case NVME_NIDT_EUI64:
783 if (cur->nidl != NVME_NIDT_EUI64_LEN) {
cdbff4f2 784 dev_warn(ctrl->device,
3b22ba26
JT
785 "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
786 cur->nidl);
787 goto free_data;
788 }
789 len = NVME_NIDT_EUI64_LEN;
cdbff4f2 790 memcpy(eui64, data + pos + sizeof(*cur), len);
3b22ba26
JT
791 break;
792 case NVME_NIDT_NGUID:
793 if (cur->nidl != NVME_NIDT_NGUID_LEN) {
cdbff4f2 794 dev_warn(ctrl->device,
3b22ba26
JT
795 "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
796 cur->nidl);
797 goto free_data;
798 }
799 len = NVME_NIDT_NGUID_LEN;
cdbff4f2 800 memcpy(nguid, data + pos + sizeof(*cur), len);
3b22ba26
JT
801 break;
802 case NVME_NIDT_UUID:
803 if (cur->nidl != NVME_NIDT_UUID_LEN) {
cdbff4f2 804 dev_warn(ctrl->device,
3b22ba26
JT
805 "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
806 cur->nidl);
807 goto free_data;
808 }
809 len = NVME_NIDT_UUID_LEN;
cdbff4f2 810 uuid_copy(uuid, data + pos + sizeof(*cur));
3b22ba26
JT
811 break;
812 default:
813 /* Skip unnkown types */
814 len = cur->nidl;
815 break;
816 }
817
818 len += sizeof(*cur);
819 }
820free_data:
821 kfree(data);
822 return status;
823}
824
540c801c
KB
825static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
826{
827 struct nvme_command c = { };
828
829 c.identify.opcode = nvme_admin_identify;
986994a2 830 c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
540c801c
KB
831 c.identify.nsid = cpu_to_le32(nsid);
832 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
833}
834
cdbff4f2
CH
835static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
836 unsigned nsid)
21d34711 837{
cdbff4f2 838 struct nvme_id_ns *id;
21d34711
CH
839 struct nvme_command c = { };
840 int error;
841
842 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
778f067c
MG
843 c.identify.opcode = nvme_admin_identify;
844 c.identify.nsid = cpu_to_le32(nsid);
986994a2 845 c.identify.cns = NVME_ID_CNS_NS;
21d34711 846
cdbff4f2
CH
847 id = kmalloc(sizeof(*id), GFP_KERNEL);
848 if (!id)
849 return NULL;
21d34711 850
cdbff4f2
CH
851 error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
852 if (error) {
853 dev_warn(ctrl->device, "Identify namespace failed\n");
854 kfree(id);
855 return NULL;
856 }
857
858 return id;
21d34711
CH
859}
860
3f7f25a9 861static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
1a6fe74d 862 void *buffer, size_t buflen, u32 *result)
21d34711
CH
863{
864 struct nvme_command c;
d49187e9 865 union nvme_result res;
1cb3cce5 866 int ret;
21d34711
CH
867
868 memset(&c, 0, sizeof(c));
869 c.features.opcode = nvme_admin_set_features;
21d34711
CH
870 c.features.fid = cpu_to_le32(fid);
871 c.features.dword11 = cpu_to_le32(dword11);
872
d49187e9 873 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res,
1a6fe74d 874 buffer, buflen, 0, NVME_QID_ANY, 0, 0);
9b47f77a 875 if (ret >= 0 && result)
d49187e9 876 *result = le32_to_cpu(res.u32);
1cb3cce5 877 return ret;
21d34711
CH
878}
879
9a0be7ab
CH
880int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
881{
882 u32 q_count = (*count - 1) | ((*count - 1) << 16);
883 u32 result;
884 int status, nr_io_queues;
885
1a6fe74d 886 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
9a0be7ab 887 &result);
f5fa90dc 888 if (status < 0)
9a0be7ab
CH
889 return status;
890
f5fa90dc
CH
891 /*
892 * Degraded controllers might return an error when setting the queue
893 * count. We still want to be able to bring them online and offer
894 * access to the admin queue, as that might be only way to fix them up.
895 */
896 if (status > 0) {
f0425db0 897 dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
f5fa90dc
CH
898 *count = 0;
899 } else {
900 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
901 *count = min(*count, nr_io_queues);
902 }
903
9a0be7ab
CH
904 return 0;
905}
576d55d6 906EXPORT_SYMBOL_GPL(nvme_set_queue_count);
9a0be7ab 907
1673f1f0
CH
908static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
909{
910 struct nvme_user_io io;
911 struct nvme_command c;
912 unsigned length, meta_len;
913 void __user *metadata;
914
915 if (copy_from_user(&io, uio, sizeof(io)))
916 return -EFAULT;
63088ec7
KB
917 if (io.flags)
918 return -EINVAL;
1673f1f0
CH
919
920 switch (io.opcode) {
921 case nvme_cmd_write:
922 case nvme_cmd_read:
923 case nvme_cmd_compare:
924 break;
925 default:
926 return -EINVAL;
927 }
928
929 length = (io.nblocks + 1) << ns->lba_shift;
930 meta_len = (io.nblocks + 1) * ns->ms;
931 metadata = (void __user *)(uintptr_t)io.metadata;
932
933 if (ns->ext) {
934 length += meta_len;
935 meta_len = 0;
936 } else if (meta_len) {
937 if ((io.metadata & 3) || !io.metadata)
938 return -EINVAL;
939 }
940
941 memset(&c, 0, sizeof(c));
942 c.rw.opcode = io.opcode;
943 c.rw.flags = io.flags;
944 c.rw.nsid = cpu_to_le32(ns->ns_id);
945 c.rw.slba = cpu_to_le64(io.slba);
946 c.rw.length = cpu_to_le16(io.nblocks);
947 c.rw.control = cpu_to_le16(io.control);
948 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
949 c.rw.reftag = cpu_to_le32(io.reftag);
950 c.rw.apptag = cpu_to_le16(io.apptag);
951 c.rw.appmask = cpu_to_le16(io.appmask);
952
63263d60 953 return nvme_submit_user_cmd(ns->queue, &c,
1673f1f0
CH
954 (void __user *)(uintptr_t)io.addr, length,
955 metadata, meta_len, io.slba, NULL, 0);
956}
957
f3ca80fc 958static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1673f1f0
CH
959 struct nvme_passthru_cmd __user *ucmd)
960{
961 struct nvme_passthru_cmd cmd;
962 struct nvme_command c;
963 unsigned timeout = 0;
964 int status;
965
966 if (!capable(CAP_SYS_ADMIN))
967 return -EACCES;
968 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
969 return -EFAULT;
63088ec7
KB
970 if (cmd.flags)
971 return -EINVAL;
1673f1f0
CH
972
973 memset(&c, 0, sizeof(c));
974 c.common.opcode = cmd.opcode;
975 c.common.flags = cmd.flags;
976 c.common.nsid = cpu_to_le32(cmd.nsid);
977 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
978 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
979 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
980 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
981 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
982 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
983 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
984 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
985
986 if (cmd.timeout_ms)
987 timeout = msecs_to_jiffies(cmd.timeout_ms);
988
989 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
d1ea7be5 990 (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
63263d60
KB
991 (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
992 0, &cmd.result, timeout);
1673f1f0
CH
993 if (status >= 0) {
994 if (put_user(cmd.result, &ucmd->result))
995 return -EFAULT;
996 }
997
998 return status;
999}
1000
1001static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
1002 unsigned int cmd, unsigned long arg)
1003{
1004 struct nvme_ns *ns = bdev->bd_disk->private_data;
1005
1006 switch (cmd) {
1007 case NVME_IOCTL_ID:
1008 force_successful_syscall_return();
1009 return ns->ns_id;
1010 case NVME_IOCTL_ADMIN_CMD:
1011 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
1012 case NVME_IOCTL_IO_CMD:
1013 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
1014 case NVME_IOCTL_SUBMIT_IO:
1015 return nvme_submit_io(ns, (void __user *)arg);
1673f1f0 1016 default:
84d4add7
MB
1017#ifdef CONFIG_NVM
1018 if (ns->ndev)
1019 return nvme_nvm_ioctl(ns, cmd, arg);
1020#endif
a98e58e5 1021 if (is_sed_ioctl(cmd))
4f1244c8 1022 return sed_ioctl(ns->ctrl->opal_dev, cmd,
e225c20e 1023 (void __user *) arg);
1673f1f0
CH
1024 return -ENOTTY;
1025 }
1026}
1027
1673f1f0
CH
1028static int nvme_open(struct block_device *bdev, fmode_t mode)
1029{
c6424a90
CH
1030 struct nvme_ns *ns = bdev->bd_disk->private_data;
1031
1032 if (!kref_get_unless_zero(&ns->kref))
1033 return -ENXIO;
1034 if (!try_module_get(ns->ctrl->ops->module)) {
1035 kref_put(&ns->kref, nvme_free_ns);
1036 return -ENXIO;
1037 }
1038
1039 return 0;
1673f1f0
CH
1040}
1041
1042static void nvme_release(struct gendisk *disk, fmode_t mode)
1043{
e439bb12
SG
1044 struct nvme_ns *ns = disk->private_data;
1045
1046 module_put(ns->ctrl->ops->module);
1047 nvme_put_ns(ns);
1673f1f0
CH
1048}
1049
1050static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1051{
1052 /* some standard values */
1053 geo->heads = 1 << 6;
1054 geo->sectors = 1 << 5;
1055 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
1056 return 0;
1057}
1058
1059#ifdef CONFIG_BLK_DEV_INTEGRITY
c81bfba9
CH
1060static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
1061 u16 bs)
1062{
1063 struct nvme_ns *ns = disk->private_data;
1064 u16 old_ms = ns->ms;
1065 u8 pi_type = 0;
1066
1067 ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
1068 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
1069
1070 /* PI implementation requires metadata equal t10 pi tuple size */
1071 if (ns->ms == sizeof(struct t10_pi_tuple))
1072 pi_type = id->dps & NVME_NS_DPS_PI_MASK;
1073
1074 if (blk_get_integrity(disk) &&
1075 (ns->pi_type != pi_type || ns->ms != old_ms ||
1076 bs != queue_logical_block_size(disk->queue) ||
1077 (ns->ms && ns->ext)))
1078 blk_integrity_unregister(disk);
1079
1080 ns->pi_type = pi_type;
1081}
1082
1673f1f0
CH
1083static void nvme_init_integrity(struct nvme_ns *ns)
1084{
1085 struct blk_integrity integrity;
1086
fa9a89fc 1087 memset(&integrity, 0, sizeof(integrity));
1673f1f0
CH
1088 switch (ns->pi_type) {
1089 case NVME_NS_DPS_PI_TYPE3:
1090 integrity.profile = &t10_pi_type3_crc;
ba36c21b
NB
1091 integrity.tag_size = sizeof(u16) + sizeof(u32);
1092 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
1673f1f0
CH
1093 break;
1094 case NVME_NS_DPS_PI_TYPE1:
1095 case NVME_NS_DPS_PI_TYPE2:
1096 integrity.profile = &t10_pi_type1_crc;
ba36c21b
NB
1097 integrity.tag_size = sizeof(u16);
1098 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
1673f1f0
CH
1099 break;
1100 default:
1101 integrity.profile = NULL;
1102 break;
1103 }
1104 integrity.tuple_size = ns->ms;
1105 blk_integrity_register(ns->disk, &integrity);
1106 blk_queue_max_integrity_segments(ns->queue, 1);
1107}
1108#else
c81bfba9
CH
1109static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
1110 u16 bs)
1111{
1112}
1673f1f0
CH
1113static void nvme_init_integrity(struct nvme_ns *ns)
1114{
1115}
1116#endif /* CONFIG_BLK_DEV_INTEGRITY */
1117
6b8190d6
SB
1118static void nvme_set_chunk_size(struct nvme_ns *ns)
1119{
1120 u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
1121 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1122}
1123
1673f1f0
CH
1124static void nvme_config_discard(struct nvme_ns *ns)
1125{
08095e70 1126 struct nvme_ctrl *ctrl = ns->ctrl;
1673f1f0 1127 u32 logical_block_size = queue_logical_block_size(ns->queue);
08095e70 1128
b35ba01e
CH
1129 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
1130 NVME_DSM_MAX_RANGES);
1131
f5d11840
JA
1132 if (ctrl->nr_streams && ns->sws && ns->sgs) {
1133 unsigned int sz = logical_block_size * ns->sws * ns->sgs;
1134
1135 ns->queue->limits.discard_alignment = sz;
1136 ns->queue->limits.discard_granularity = sz;
1137 } else {
1138 ns->queue->limits.discard_alignment = logical_block_size;
1139 ns->queue->limits.discard_granularity = logical_block_size;
1140 }
bd0fc288 1141 blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
b35ba01e 1142 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
1673f1f0 1143 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
e850fd16
CH
1144
1145 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
1146 blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
1673f1f0
CH
1147}
1148
cdbff4f2
CH
1149static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
1150 struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid)
1673f1f0 1151{
cdbff4f2
CH
1152 if (ctrl->vs >= NVME_VS(1, 1, 0))
1153 memcpy(eui64, id->eui64, sizeof(id->eui64));
1154 if (ctrl->vs >= NVME_VS(1, 2, 0))
1155 memcpy(nguid, id->nguid, sizeof(id->nguid));
1156 if (ctrl->vs >= NVME_VS(1, 3, 0)) {
3b22ba26
JT
1157 /* Don't treat error as fatal we potentially
1158 * already have a NGUID or EUI-64
1159 */
cdbff4f2
CH
1160 if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid))
1161 dev_warn(ctrl->device,
3b22ba26
JT
1162 "%s: Identify Descriptors failed\n", __func__);
1163 }
ac81bfa9
MB
1164}
1165
1166static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1167{
1168 struct nvme_ns *ns = disk->private_data;
f5d11840 1169 struct nvme_ctrl *ctrl = ns->ctrl;
c81bfba9 1170 u16 bs;
1673f1f0
CH
1171
1172 /*
1173 * If identify namespace failed, use default 512 byte block size so
1174 * block layer can use before failing read/write for 0 capacity.
1175 */
c81bfba9 1176 ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
1673f1f0
CH
1177 if (ns->lba_shift == 0)
1178 ns->lba_shift = 9;
1179 bs = 1 << ns->lba_shift;
6b8190d6 1180 ns->noiob = le16_to_cpu(id->noiob);
1673f1f0
CH
1181
1182 blk_mq_freeze_queue(disk->queue);
1673f1f0 1183
f5d11840 1184 if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
c81bfba9 1185 nvme_prep_integrity(disk, id, bs);
1673f1f0 1186 blk_queue_logical_block_size(ns->queue, bs);
6b8190d6
SB
1187 if (ns->noiob)
1188 nvme_set_chunk_size(ns);
4b9d5b15 1189 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
1673f1f0 1190 nvme_init_integrity(ns);
1673f1f0
CH
1191 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
1192 set_capacity(disk, 0);
1193 else
1194 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
1195
f5d11840 1196 if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
1673f1f0
CH
1197 nvme_config_discard(ns);
1198 blk_mq_unfreeze_queue(disk->queue);
ac81bfa9 1199}
1673f1f0 1200
ac81bfa9
MB
1201static int nvme_revalidate_disk(struct gendisk *disk)
1202{
1203 struct nvme_ns *ns = disk->private_data;
cdbff4f2
CH
1204 struct nvme_ctrl *ctrl = ns->ctrl;
1205 struct nvme_id_ns *id;
1d5df6af
CH
1206 u8 eui64[8] = { 0 }, nguid[16] = { 0 };
1207 uuid_t uuid = uuid_null;
cdbff4f2 1208 int ret = 0;
ac81bfa9
MB
1209
1210 if (test_bit(NVME_NS_DEAD, &ns->flags)) {
1211 set_capacity(disk, 0);
1212 return -ENODEV;
1213 }
1214
cdbff4f2
CH
1215 id = nvme_identify_ns(ctrl, ns->ns_id);
1216 if (!id)
1217 return -ENODEV;
ac81bfa9 1218
cdbff4f2
CH
1219 if (id->ncap == 0) {
1220 ret = -ENODEV;
1221 goto out;
1222 }
ac81bfa9 1223
1d5df6af
CH
1224 nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid);
1225 if (!uuid_equal(&ns->uuid, &uuid) ||
1226 memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) ||
1227 memcmp(&ns->eui, &eui64, sizeof(ns->eui))) {
1228 dev_err(ctrl->device,
1229 "identifiers changed for nsid %d\n", ns->ns_id);
1230 ret = -ENODEV;
1231 }
1232
cdbff4f2
CH
1233out:
1234 kfree(id);
1235 return ret;
1673f1f0
CH
1236}
1237
1238static char nvme_pr_type(enum pr_type type)
1239{
1240 switch (type) {
1241 case PR_WRITE_EXCLUSIVE:
1242 return 1;
1243 case PR_EXCLUSIVE_ACCESS:
1244 return 2;
1245 case PR_WRITE_EXCLUSIVE_REG_ONLY:
1246 return 3;
1247 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
1248 return 4;
1249 case PR_WRITE_EXCLUSIVE_ALL_REGS:
1250 return 5;
1251 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
1252 return 6;
1253 default:
1254 return 0;
1255 }
1256};
1257
1258static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
1259 u64 key, u64 sa_key, u8 op)
1260{
1261 struct nvme_ns *ns = bdev->bd_disk->private_data;
1262 struct nvme_command c;
1263 u8 data[16] = { 0, };
1264
1265 put_unaligned_le64(key, &data[0]);
1266 put_unaligned_le64(sa_key, &data[8]);
1267
1268 memset(&c, 0, sizeof(c));
1269 c.common.opcode = op;
1270 c.common.nsid = cpu_to_le32(ns->ns_id);
1271 c.common.cdw10[0] = cpu_to_le32(cdw10);
1272
1273 return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
1274}
1275
1276static int nvme_pr_register(struct block_device *bdev, u64 old,
1277 u64 new, unsigned flags)
1278{
1279 u32 cdw10;
1280
1281 if (flags & ~PR_FL_IGNORE_KEY)
1282 return -EOPNOTSUPP;
1283
1284 cdw10 = old ? 2 : 0;
1285 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
1286 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
1287 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
1288}
1289
1290static int nvme_pr_reserve(struct block_device *bdev, u64 key,
1291 enum pr_type type, unsigned flags)
1292{
1293 u32 cdw10;
1294
1295 if (flags & ~PR_FL_IGNORE_KEY)
1296 return -EOPNOTSUPP;
1297
1298 cdw10 = nvme_pr_type(type) << 8;
1299 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
1300 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
1301}
1302
1303static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
1304 enum pr_type type, bool abort)
1305{
1306 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
1307 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
1308}
1309
1310static int nvme_pr_clear(struct block_device *bdev, u64 key)
1311{
8c0b3915 1312 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
1673f1f0
CH
1313 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
1314}
1315
1316static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
1317{
1318 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
1319 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
1320}
1321
1322static const struct pr_ops nvme_pr_ops = {
1323 .pr_register = nvme_pr_register,
1324 .pr_reserve = nvme_pr_reserve,
1325 .pr_release = nvme_pr_release,
1326 .pr_preempt = nvme_pr_preempt,
1327 .pr_clear = nvme_pr_clear,
1328};
1329
a98e58e5 1330#ifdef CONFIG_BLK_SED_OPAL
4f1244c8
CH
1331int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
1332 bool send)
a98e58e5 1333{
4f1244c8 1334 struct nvme_ctrl *ctrl = data;
a98e58e5 1335 struct nvme_command cmd;
a98e58e5
SB
1336
1337 memset(&cmd, 0, sizeof(cmd));
1338 if (send)
1339 cmd.common.opcode = nvme_admin_security_send;
1340 else
1341 cmd.common.opcode = nvme_admin_security_recv;
a98e58e5
SB
1342 cmd.common.nsid = 0;
1343 cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
1344 cmd.common.cdw10[1] = cpu_to_le32(len);
1345
1346 return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
1347 ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0);
1348}
1349EXPORT_SYMBOL_GPL(nvme_sec_submit);
1350#endif /* CONFIG_BLK_SED_OPAL */
1351
5bae7f73 1352static const struct block_device_operations nvme_fops = {
1673f1f0
CH
1353 .owner = THIS_MODULE,
1354 .ioctl = nvme_ioctl,
761f2e1e 1355 .compat_ioctl = nvme_ioctl,
1673f1f0
CH
1356 .open = nvme_open,
1357 .release = nvme_release,
1358 .getgeo = nvme_getgeo,
1359 .revalidate_disk= nvme_revalidate_disk,
1360 .pr_ops = &nvme_pr_ops,
1361};
1362
5fd4ce1b
CH
1363static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
1364{
1365 unsigned long timeout =
1366 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
1367 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
1368 int ret;
1369
1370 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
0df1e4f5
KB
1371 if (csts == ~0)
1372 return -ENODEV;
5fd4ce1b
CH
1373 if ((csts & NVME_CSTS_RDY) == bit)
1374 break;
1375
1376 msleep(100);
1377 if (fatal_signal_pending(current))
1378 return -EINTR;
1379 if (time_after(jiffies, timeout)) {
1b3c47c1 1380 dev_err(ctrl->device,
5fd4ce1b
CH
1381 "Device not ready; aborting %s\n", enabled ?
1382 "initialisation" : "reset");
1383 return -ENODEV;
1384 }
1385 }
1386
1387 return ret;
1388}
1389
1390/*
1391 * If the device has been passed off to us in an enabled state, just clear
1392 * the enabled bit. The spec says we should set the 'shutdown notification
1393 * bits', but doing so may cause the device to complete commands to the
1394 * admin queue ... and we don't know what memory that might be pointing at!
1395 */
1396int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
1397{
1398 int ret;
1399
1400 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
1401 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
1402
1403 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1404 if (ret)
1405 return ret;
54adc010 1406
b5a10c5f 1407 if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
54adc010
GP
1408 msleep(NVME_QUIRK_DELAY_AMOUNT);
1409
5fd4ce1b
CH
1410 return nvme_wait_ready(ctrl, cap, false);
1411}
576d55d6 1412EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
5fd4ce1b
CH
1413
1414int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
1415{
1416 /*
1417 * Default to a 4K page size, with the intention to update this
1418 * path in the future to accomodate architectures with differing
1419 * kernel and IO page sizes.
1420 */
1421 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
1422 int ret;
1423
1424 if (page_shift < dev_page_min) {
1b3c47c1 1425 dev_err(ctrl->device,
5fd4ce1b
CH
1426 "Minimum device page size %u too large for host (%u)\n",
1427 1 << dev_page_min, 1 << page_shift);
1428 return -ENODEV;
1429 }
1430
1431 ctrl->page_size = 1 << page_shift;
1432
1433 ctrl->ctrl_config = NVME_CC_CSS_NVM;
1434 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
60b43f62 1435 ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
5fd4ce1b
CH
1436 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
1437 ctrl->ctrl_config |= NVME_CC_ENABLE;
1438
1439 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1440 if (ret)
1441 return ret;
1442 return nvme_wait_ready(ctrl, cap, true);
1443}
576d55d6 1444EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
5fd4ce1b
CH
1445
1446int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
1447{
07fbd32a 1448 unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ);
5fd4ce1b
CH
1449 u32 csts;
1450 int ret;
1451
1452 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
1453 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
1454
1455 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1456 if (ret)
1457 return ret;
1458
1459 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
1460 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
1461 break;
1462
1463 msleep(100);
1464 if (fatal_signal_pending(current))
1465 return -EINTR;
1466 if (time_after(jiffies, timeout)) {
1b3c47c1 1467 dev_err(ctrl->device,
5fd4ce1b
CH
1468 "Device shutdown incomplete; abort shutdown\n");
1469 return -ENODEV;
1470 }
1471 }
1472
1473 return ret;
1474}
576d55d6 1475EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
5fd4ce1b 1476
da35825d
CH
1477static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1478 struct request_queue *q)
1479{
7c88cb00
JA
1480 bool vwc = false;
1481
da35825d 1482 if (ctrl->max_hw_sectors) {
45686b61
CH
1483 u32 max_segments =
1484 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
1485
da35825d 1486 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
45686b61 1487 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
da35825d 1488 }
e6282aef
KB
1489 if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
1490 blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
da35825d 1491 blk_queue_virt_boundary(q, ctrl->page_size - 1);
7c88cb00
JA
1492 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1493 vwc = true;
1494 blk_queue_write_cache(q, vwc, vwc);
da35825d
CH
1495}
1496
dbf86b39
JD
1497static int nvme_configure_timestamp(struct nvme_ctrl *ctrl)
1498{
1499 __le64 ts;
1500 int ret;
1501
1502 if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP))
1503 return 0;
1504
1505 ts = cpu_to_le64(ktime_to_ms(ktime_get_real()));
1506 ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts),
1507 NULL);
1508 if (ret)
1509 dev_warn_once(ctrl->device,
1510 "could not set timestamp (%d)\n", ret);
1511 return ret;
1512}
1513
634b8325 1514static int nvme_configure_apst(struct nvme_ctrl *ctrl)
c5552fde
AL
1515{
1516 /*
1517 * APST (Autonomous Power State Transition) lets us program a
1518 * table of power state transitions that the controller will
1519 * perform automatically. We configure it with a simple
1520 * heuristic: we are willing to spend at most 2% of the time
1521 * transitioning between power states. Therefore, when running
1522 * in any given state, we will enter the next lower-power
76e4ad09 1523 * non-operational state after waiting 50 * (enlat + exlat)
da87591b 1524 * microseconds, as long as that state's exit latency is under
c5552fde
AL
1525 * the requested maximum latency.
1526 *
1527 * We will not autonomously enter any non-operational state for
1528 * which the total latency exceeds ps_max_latency_us. Users
1529 * can set ps_max_latency_us to zero to turn off APST.
1530 */
1531
1532 unsigned apste;
1533 struct nvme_feat_auto_pst *table;
fb0dc399
AL
1534 u64 max_lat_us = 0;
1535 int max_ps = -1;
c5552fde
AL
1536 int ret;
1537
1538 /*
1539 * If APST isn't supported or if we haven't been initialized yet,
1540 * then don't do anything.
1541 */
1542 if (!ctrl->apsta)
634b8325 1543 return 0;
c5552fde
AL
1544
1545 if (ctrl->npss > 31) {
1546 dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
634b8325 1547 return 0;
c5552fde
AL
1548 }
1549
1550 table = kzalloc(sizeof(*table), GFP_KERNEL);
1551 if (!table)
634b8325 1552 return 0;
c5552fde 1553
76a5af84 1554 if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
c5552fde
AL
1555 /* Turn off APST. */
1556 apste = 0;
fb0dc399 1557 dev_dbg(ctrl->device, "APST disabled\n");
c5552fde
AL
1558 } else {
1559 __le64 target = cpu_to_le64(0);
1560 int state;
1561
1562 /*
1563 * Walk through all states from lowest- to highest-power.
1564 * According to the spec, lower-numbered states use more
1565 * power. NPSS, despite the name, is the index of the
1566 * lowest-power state, not the number of states.
1567 */
1568 for (state = (int)ctrl->npss; state >= 0; state--) {
da87591b 1569 u64 total_latency_us, exit_latency_us, transition_ms;
c5552fde
AL
1570
1571 if (target)
1572 table->entries[state] = target;
1573
ff5350a8
AL
1574 /*
1575 * Don't allow transitions to the deepest state
1576 * if it's quirked off.
1577 */
1578 if (state == ctrl->npss &&
1579 (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS))
1580 continue;
1581
c5552fde
AL
1582 /*
1583 * Is this state a useful non-operational state for
1584 * higher-power states to autonomously transition to?
1585 */
1586 if (!(ctrl->psd[state].flags &
1587 NVME_PS_FLAGS_NON_OP_STATE))
1588 continue;
1589
da87591b
KHF
1590 exit_latency_us =
1591 (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
1592 if (exit_latency_us > ctrl->ps_max_latency_us)
c5552fde
AL
1593 continue;
1594
da87591b
KHF
1595 total_latency_us =
1596 exit_latency_us +
1597 le32_to_cpu(ctrl->psd[state].entry_lat);
1598
c5552fde
AL
1599 /*
1600 * This state is good. Use it as the APST idle
1601 * target for higher power states.
1602 */
1603 transition_ms = total_latency_us + 19;
1604 do_div(transition_ms, 20);
1605 if (transition_ms > (1 << 24) - 1)
1606 transition_ms = (1 << 24) - 1;
1607
1608 target = cpu_to_le64((state << 3) |
1609 (transition_ms << 8));
fb0dc399
AL
1610
1611 if (max_ps == -1)
1612 max_ps = state;
1613
1614 if (total_latency_us > max_lat_us)
1615 max_lat_us = total_latency_us;
c5552fde
AL
1616 }
1617
1618 apste = 1;
fb0dc399
AL
1619
1620 if (max_ps == -1) {
1621 dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n");
1622 } else {
1623 dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n",
1624 max_ps, max_lat_us, (int)sizeof(*table), table);
1625 }
c5552fde
AL
1626 }
1627
1628 ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
1629 table, sizeof(*table), NULL);
1630 if (ret)
1631 dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
1632
1633 kfree(table);
634b8325 1634 return ret;
c5552fde
AL
1635}
1636
1637static void nvme_set_latency_tolerance(struct device *dev, s32 val)
1638{
1639 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1640 u64 latency;
1641
1642 switch (val) {
1643 case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
1644 case PM_QOS_LATENCY_ANY:
1645 latency = U64_MAX;
1646 break;
1647
1648 default:
1649 latency = val;
1650 }
1651
1652 if (ctrl->ps_max_latency_us != latency) {
1653 ctrl->ps_max_latency_us = latency;
1654 nvme_configure_apst(ctrl);
1655 }
1656}
1657
bd4da3ab
AL
1658struct nvme_core_quirk_entry {
1659 /*
1660 * NVMe model and firmware strings are padded with spaces. For
1661 * simplicity, strings in the quirk table are padded with NULLs
1662 * instead.
1663 */
1664 u16 vid;
1665 const char *mn;
1666 const char *fr;
1667 unsigned long quirks;
1668};
1669
1670static const struct nvme_core_quirk_entry core_quirks[] = {
c5552fde 1671 {
be56945c
AL
1672 /*
1673 * This Toshiba device seems to die using any APST states. See:
1674 * https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678184/comments/11
1675 */
1676 .vid = 0x1179,
1677 .mn = "THNSF5256GPUK TOSHIBA",
c5552fde 1678 .quirks = NVME_QUIRK_NO_APST,
be56945c 1679 }
bd4da3ab
AL
1680};
1681
1682/* match is null-terminated but idstr is space-padded. */
1683static bool string_matches(const char *idstr, const char *match, size_t len)
1684{
1685 size_t matchlen;
1686
1687 if (!match)
1688 return true;
1689
1690 matchlen = strlen(match);
1691 WARN_ON_ONCE(matchlen > len);
1692
1693 if (memcmp(idstr, match, matchlen))
1694 return false;
1695
1696 for (; matchlen < len; matchlen++)
1697 if (idstr[matchlen] != ' ')
1698 return false;
1699
1700 return true;
1701}
1702
1703static bool quirk_matches(const struct nvme_id_ctrl *id,
1704 const struct nvme_core_quirk_entry *q)
1705{
1706 return q->vid == le16_to_cpu(id->vid) &&
1707 string_matches(id->mn, q->mn, sizeof(id->mn)) &&
1708 string_matches(id->fr, q->fr, sizeof(id->fr));
1709}
1710
180de007
CH
1711static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
1712{
1713 size_t nqnlen;
1714 int off;
1715
1716 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
1717 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
1718 strcpy(ctrl->subnqn, id->subnqn);
1719 return;
1720 }
1721
1722 if (ctrl->vs >= NVME_VS(1, 2, 1))
1723 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
1724
1725 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
1726 off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
1727 "nqn.2014.08.org.nvmexpress:%4x%4x",
1728 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
1729 memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
1730 off += sizeof(id->sn);
1731 memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
1732 off += sizeof(id->mn);
1733 memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
1734}
1735
7fd8930f
CH
1736/*
1737 * Initialize the cached copies of the Identify data and various controller
1738 * register in our nvme_ctrl structure. This should be called as soon as
1739 * the admin queue is fully up and running.
1740 */
1741int nvme_init_identify(struct nvme_ctrl *ctrl)
1742{
1743 struct nvme_id_ctrl *id;
1744 u64 cap;
1745 int ret, page_shift;
a229dbf6 1746 u32 max_hw_sectors;
76a5af84 1747 bool prev_apst_enabled;
7fd8930f 1748
f3ca80fc
CH
1749 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
1750 if (ret) {
1b3c47c1 1751 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
f3ca80fc
CH
1752 return ret;
1753 }
1754
7fd8930f
CH
1755 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
1756 if (ret) {
1b3c47c1 1757 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
7fd8930f
CH
1758 return ret;
1759 }
1760 page_shift = NVME_CAP_MPSMIN(cap) + 12;
1761
8ef2074d 1762 if (ctrl->vs >= NVME_VS(1, 1, 0))
f3ca80fc
CH
1763 ctrl->subsystem = NVME_CAP_NSSRC(cap);
1764
7fd8930f
CH
1765 ret = nvme_identify_ctrl(ctrl, &id);
1766 if (ret) {
1b3c47c1 1767 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
7fd8930f
CH
1768 return -EIO;
1769 }
1770
180de007
CH
1771 nvme_init_subnqn(ctrl, id);
1772
bd4da3ab
AL
1773 if (!ctrl->identified) {
1774 /*
1775 * Check for quirks. Quirk can depend on firmware version,
1776 * so, in principle, the set of quirks present can change
1777 * across a reset. As a possible future enhancement, we
1778 * could re-scan for quirks every time we reinitialize
1779 * the device, but we'd have to make sure that the driver
1780 * behaves intelligently if the quirks change.
1781 */
1782
1783 int i;
1784
1785 for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
1786 if (quirk_matches(id, &core_quirks[i]))
1787 ctrl->quirks |= core_quirks[i].quirks;
1788 }
1789 }
1790
c35e30b4 1791 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
f0425db0 1792 dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
c35e30b4
AL
1793 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
1794 }
1795
8a9ae523 1796 ctrl->oacs = le16_to_cpu(id->oacs);
118472ab 1797 ctrl->vid = le16_to_cpu(id->vid);
7fd8930f 1798 ctrl->oncs = le16_to_cpup(&id->oncs);
6bf25d16 1799 atomic_set(&ctrl->abort_limit, id->acl + 1);
7fd8930f 1800 ctrl->vwc = id->vwc;
931e1c22 1801 ctrl->cntlid = le16_to_cpup(&id->cntlid);
7fd8930f
CH
1802 memcpy(ctrl->serial, id->sn, sizeof(id->sn));
1803 memcpy(ctrl->model, id->mn, sizeof(id->mn));
1804 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
1805 if (id->mdts)
a229dbf6 1806 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
7fd8930f 1807 else
a229dbf6
CH
1808 max_hw_sectors = UINT_MAX;
1809 ctrl->max_hw_sectors =
1810 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
7fd8930f 1811
da35825d 1812 nvme_set_queue_limits(ctrl, ctrl->admin_q);
07bfcd09 1813 ctrl->sgls = le32_to_cpu(id->sgls);
038bd4cb 1814 ctrl->kas = le16_to_cpu(id->kas);
07bfcd09 1815
07fbd32a
MP
1816 if (id->rtd3e) {
1817 /* us -> s */
1818 u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000;
1819
1820 ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
1821 shutdown_timeout, 60);
1822
1823 if (ctrl->shutdown_timeout != shutdown_timeout)
1824 dev_warn(ctrl->device,
1825 "Shutdown timeout set to %u seconds\n",
1826 ctrl->shutdown_timeout);
1827 } else
1828 ctrl->shutdown_timeout = shutdown_timeout;
1829
c5552fde 1830 ctrl->npss = id->npss;
76a5af84
KHF
1831 ctrl->apsta = id->apsta;
1832 prev_apst_enabled = ctrl->apst_enabled;
c35e30b4
AL
1833 if (ctrl->quirks & NVME_QUIRK_NO_APST) {
1834 if (force_apst && id->apsta) {
f0425db0 1835 dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
76a5af84 1836 ctrl->apst_enabled = true;
c35e30b4 1837 } else {
76a5af84 1838 ctrl->apst_enabled = false;
c35e30b4
AL
1839 }
1840 } else {
76a5af84 1841 ctrl->apst_enabled = id->apsta;
c35e30b4 1842 }
c5552fde
AL
1843 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
1844
d3d5b87d 1845 if (ctrl->ops->flags & NVME_F_FABRICS) {
07bfcd09
CH
1846 ctrl->icdoff = le16_to_cpu(id->icdoff);
1847 ctrl->ioccsz = le32_to_cpu(id->ioccsz);
1848 ctrl->iorcsz = le32_to_cpu(id->iorcsz);
1849 ctrl->maxcmd = le16_to_cpu(id->maxcmd);
1850
1851 /*
1852 * In fabrics we need to verify the cntlid matches the
1853 * admin connect
1854 */
634b8325 1855 if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
07bfcd09 1856 ret = -EINVAL;
634b8325
KB
1857 goto out_free;
1858 }
038bd4cb
SG
1859
1860 if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
f0425db0 1861 dev_err(ctrl->device,
038bd4cb
SG
1862 "keep-alive support is mandatory for fabrics\n");
1863 ret = -EINVAL;
634b8325 1864 goto out_free;
038bd4cb 1865 }
07bfcd09
CH
1866 } else {
1867 ctrl->cntlid = le16_to_cpu(id->cntlid);
fe6d53c9
CH
1868 ctrl->hmpre = le32_to_cpu(id->hmpre);
1869 ctrl->hmmin = le32_to_cpu(id->hmmin);
044a9df1
CH
1870 ctrl->hmminds = le32_to_cpu(id->hmminds);
1871 ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
07bfcd09 1872 }
da35825d 1873
7fd8930f 1874 kfree(id);
bd4da3ab 1875
76a5af84 1876 if (ctrl->apst_enabled && !prev_apst_enabled)
c5552fde 1877 dev_pm_qos_expose_latency_tolerance(ctrl->device);
76a5af84 1878 else if (!ctrl->apst_enabled && prev_apst_enabled)
c5552fde
AL
1879 dev_pm_qos_hide_latency_tolerance(ctrl->device);
1880
634b8325
KB
1881 ret = nvme_configure_apst(ctrl);
1882 if (ret < 0)
1883 return ret;
dbf86b39
JD
1884
1885 ret = nvme_configure_timestamp(ctrl);
1886 if (ret < 0)
1887 return ret;
634b8325
KB
1888
1889 ret = nvme_configure_directives(ctrl);
1890 if (ret < 0)
1891 return ret;
c5552fde 1892
bd4da3ab 1893 ctrl->identified = true;
c5552fde 1894
634b8325
KB
1895 return 0;
1896
1897out_free:
1898 kfree(id);
07bfcd09 1899 return ret;
7fd8930f 1900}
576d55d6 1901EXPORT_SYMBOL_GPL(nvme_init_identify);
7fd8930f 1902
f3ca80fc 1903static int nvme_dev_open(struct inode *inode, struct file *file)
1673f1f0 1904{
f3ca80fc
CH
1905 struct nvme_ctrl *ctrl;
1906 int instance = iminor(inode);
1907 int ret = -ENODEV;
1673f1f0 1908
f3ca80fc
CH
1909 spin_lock(&dev_list_lock);
1910 list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
1911 if (ctrl->instance != instance)
1912 continue;
1913
1914 if (!ctrl->admin_q) {
1915 ret = -EWOULDBLOCK;
1916 break;
1917 }
d22524a4 1918 if (!kobject_get_unless_zero(&ctrl->device->kobj))
f3ca80fc
CH
1919 break;
1920 file->private_data = ctrl;
1921 ret = 0;
1922 break;
1923 }
1924 spin_unlock(&dev_list_lock);
1925
1926 return ret;
1673f1f0
CH
1927}
1928
f3ca80fc 1929static int nvme_dev_release(struct inode *inode, struct file *file)
1673f1f0 1930{
f3ca80fc
CH
1931 nvme_put_ctrl(file->private_data);
1932 return 0;
1933}
1934
bfd89471
CH
1935static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
1936{
1937 struct nvme_ns *ns;
1938 int ret;
1939
1940 mutex_lock(&ctrl->namespaces_mutex);
1941 if (list_empty(&ctrl->namespaces)) {
1942 ret = -ENOTTY;
1943 goto out_unlock;
1944 }
1945
1946 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
1947 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
1b3c47c1 1948 dev_warn(ctrl->device,
bfd89471
CH
1949 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
1950 ret = -EINVAL;
1951 goto out_unlock;
1952 }
1953
1b3c47c1 1954 dev_warn(ctrl->device,
bfd89471
CH
1955 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
1956 kref_get(&ns->kref);
1957 mutex_unlock(&ctrl->namespaces_mutex);
1958
1959 ret = nvme_user_cmd(ctrl, ns, argp);
1960 nvme_put_ns(ns);
1961 return ret;
1962
1963out_unlock:
1964 mutex_unlock(&ctrl->namespaces_mutex);
1965 return ret;
1966}
1967
f3ca80fc
CH
1968static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
1969 unsigned long arg)
1970{
1971 struct nvme_ctrl *ctrl = file->private_data;
1972 void __user *argp = (void __user *)arg;
f3ca80fc
CH
1973
1974 switch (cmd) {
1975 case NVME_IOCTL_ADMIN_CMD:
1976 return nvme_user_cmd(ctrl, NULL, argp);
1977 case NVME_IOCTL_IO_CMD:
bfd89471 1978 return nvme_dev_user_cmd(ctrl, argp);
f3ca80fc 1979 case NVME_IOCTL_RESET:
1b3c47c1 1980 dev_warn(ctrl->device, "resetting controller\n");
d86c4d8e 1981 return nvme_reset_ctrl_sync(ctrl);
f3ca80fc
CH
1982 case NVME_IOCTL_SUBSYS_RESET:
1983 return nvme_reset_subsystem(ctrl);
9ec3bb2f
KB
1984 case NVME_IOCTL_RESCAN:
1985 nvme_queue_scan(ctrl);
1986 return 0;
f3ca80fc
CH
1987 default:
1988 return -ENOTTY;
1989 }
1990}
1991
1992static const struct file_operations nvme_dev_fops = {
1993 .owner = THIS_MODULE,
1994 .open = nvme_dev_open,
1995 .release = nvme_dev_release,
1996 .unlocked_ioctl = nvme_dev_ioctl,
1997 .compat_ioctl = nvme_dev_ioctl,
1998};
1999
2000static ssize_t nvme_sysfs_reset(struct device *dev,
2001 struct device_attribute *attr, const char *buf,
2002 size_t count)
2003{
2004 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2005 int ret;
2006
d86c4d8e 2007 ret = nvme_reset_ctrl_sync(ctrl);
f3ca80fc
CH
2008 if (ret < 0)
2009 return ret;
2010 return count;
1673f1f0 2011}
f3ca80fc 2012static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1673f1f0 2013
9ec3bb2f
KB
2014static ssize_t nvme_sysfs_rescan(struct device *dev,
2015 struct device_attribute *attr, const char *buf,
2016 size_t count)
2017{
2018 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2019
2020 nvme_queue_scan(ctrl);
2021 return count;
2022}
2023static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
2024
118472ab
KB
2025static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
2026 char *buf)
2027{
40267efd 2028 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
118472ab
KB
2029 struct nvme_ctrl *ctrl = ns->ctrl;
2030 int serial_len = sizeof(ctrl->serial);
2031 int model_len = sizeof(ctrl->model);
2032
6484f5d1
JT
2033 if (!uuid_is_null(&ns->uuid))
2034 return sprintf(buf, "uuid.%pU\n", &ns->uuid);
2035
90985b84
JT
2036 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
2037 return sprintf(buf, "eui.%16phN\n", ns->nguid);
118472ab
KB
2038
2039 if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
2040 return sprintf(buf, "eui.%8phN\n", ns->eui);
2041
758f3735
MW
2042 while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' ||
2043 ctrl->serial[serial_len - 1] == '\0'))
118472ab 2044 serial_len--;
758f3735
MW
2045 while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' ||
2046 ctrl->model[model_len - 1] == '\0'))
118472ab
KB
2047 model_len--;
2048
2049 return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,
2050 serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id);
2051}
2052static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
2053
d934f984
JT
2054static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
2055 char *buf)
2056{
2057 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2058 return sprintf(buf, "%pU\n", ns->nguid);
2059}
2060static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
2061
2b9b6e86
KB
2062static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
2063 char *buf)
2064{
40267efd 2065 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
d934f984
JT
2066
2067 /* For backward compatibility expose the NGUID to userspace if
2068 * we have no UUID set
2069 */
2070 if (uuid_is_null(&ns->uuid)) {
2071 printk_ratelimited(KERN_WARNING
2072 "No UUID available providing old NGUID\n");
2073 return sprintf(buf, "%pU\n", ns->nguid);
2074 }
2075 return sprintf(buf, "%pU\n", &ns->uuid);
2b9b6e86
KB
2076}
2077static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
2078
2079static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
2080 char *buf)
2081{
40267efd 2082 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2b9b6e86
KB
2083 return sprintf(buf, "%8phd\n", ns->eui);
2084}
2085static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
2086
2087static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
2088 char *buf)
2089{
40267efd 2090 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2b9b6e86
KB
2091 return sprintf(buf, "%d\n", ns->ns_id);
2092}
2093static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
2094
2095static struct attribute *nvme_ns_attrs[] = {
118472ab 2096 &dev_attr_wwid.attr,
2b9b6e86 2097 &dev_attr_uuid.attr,
d934f984 2098 &dev_attr_nguid.attr,
2b9b6e86
KB
2099 &dev_attr_eui.attr,
2100 &dev_attr_nsid.attr,
2101 NULL,
2102};
2103
1a353d85 2104static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
2b9b6e86
KB
2105 struct attribute *a, int n)
2106{
2107 struct device *dev = container_of(kobj, struct device, kobj);
40267efd 2108 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2b9b6e86
KB
2109
2110 if (a == &dev_attr_uuid.attr) {
d934f984
JT
2111 if (uuid_is_null(&ns->uuid) ||
2112 !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
2113 return 0;
2114 }
2115 if (a == &dev_attr_nguid.attr) {
90985b84 2116 if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
2b9b6e86
KB
2117 return 0;
2118 }
2119 if (a == &dev_attr_eui.attr) {
2120 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
2121 return 0;
2122 }
2123 return a->mode;
2124}
2125
2126static const struct attribute_group nvme_ns_attr_group = {
2127 .attrs = nvme_ns_attrs,
1a353d85 2128 .is_visible = nvme_ns_attrs_are_visible,
2b9b6e86
KB
2129};
2130
931e1c22 2131#define nvme_show_str_function(field) \
779ff756
KB
2132static ssize_t field##_show(struct device *dev, \
2133 struct device_attribute *attr, char *buf) \
2134{ \
2135 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
2136 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
2137} \
2138static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
2139
931e1c22
ML
2140#define nvme_show_int_function(field) \
2141static ssize_t field##_show(struct device *dev, \
2142 struct device_attribute *attr, char *buf) \
2143{ \
2144 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
2145 return sprintf(buf, "%d\n", ctrl->field); \
2146} \
2147static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
2148
2149nvme_show_str_function(model);
2150nvme_show_str_function(serial);
2151nvme_show_str_function(firmware_rev);
2152nvme_show_int_function(cntlid);
779ff756 2153
1a353d85
ML
2154static ssize_t nvme_sysfs_delete(struct device *dev,
2155 struct device_attribute *attr, const char *buf,
2156 size_t count)
2157{
2158 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2159
2160 if (device_remove_file_self(dev, attr))
2161 ctrl->ops->delete_ctrl(ctrl);
2162 return count;
2163}
2164static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
2165
2166static ssize_t nvme_sysfs_show_transport(struct device *dev,
2167 struct device_attribute *attr,
2168 char *buf)
2169{
2170 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2171
2172 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->ops->name);
2173}
2174static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
2175
8432bdb2
SG
2176static ssize_t nvme_sysfs_show_state(struct device *dev,
2177 struct device_attribute *attr,
2178 char *buf)
2179{
2180 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2181 static const char *const state_name[] = {
2182 [NVME_CTRL_NEW] = "new",
2183 [NVME_CTRL_LIVE] = "live",
2184 [NVME_CTRL_RESETTING] = "resetting",
2185 [NVME_CTRL_RECONNECTING]= "reconnecting",
2186 [NVME_CTRL_DELETING] = "deleting",
2187 [NVME_CTRL_DEAD] = "dead",
2188 };
2189
2190 if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
2191 state_name[ctrl->state])
2192 return sprintf(buf, "%s\n", state_name[ctrl->state]);
2193
2194 return sprintf(buf, "unknown state\n");
2195}
2196
2197static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
2198
1a353d85
ML
2199static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
2200 struct device_attribute *attr,
2201 char *buf)
2202{
2203 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2204
180de007 2205 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
1a353d85
ML
2206}
2207static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
2208
2209static ssize_t nvme_sysfs_show_address(struct device *dev,
2210 struct device_attribute *attr,
2211 char *buf)
2212{
2213 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2214
2215 return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE);
2216}
2217static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
2218
779ff756
KB
2219static struct attribute *nvme_dev_attrs[] = {
2220 &dev_attr_reset_controller.attr,
9ec3bb2f 2221 &dev_attr_rescan_controller.attr,
779ff756
KB
2222 &dev_attr_model.attr,
2223 &dev_attr_serial.attr,
2224 &dev_attr_firmware_rev.attr,
931e1c22 2225 &dev_attr_cntlid.attr,
1a353d85
ML
2226 &dev_attr_delete_controller.attr,
2227 &dev_attr_transport.attr,
2228 &dev_attr_subsysnqn.attr,
2229 &dev_attr_address.attr,
8432bdb2 2230 &dev_attr_state.attr,
779ff756
KB
2231 NULL
2232};
2233
1a353d85
ML
2234static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
2235 struct attribute *a, int n)
2236{
2237 struct device *dev = container_of(kobj, struct device, kobj);
2238 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
2239
49d3d50b
CH
2240 if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
2241 return 0;
2242 if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
2243 return 0;
1a353d85
ML
2244
2245 return a->mode;
2246}
2247
779ff756 2248static struct attribute_group nvme_dev_attrs_group = {
1a353d85
ML
2249 .attrs = nvme_dev_attrs,
2250 .is_visible = nvme_dev_attrs_are_visible,
779ff756
KB
2251};
2252
2253static const struct attribute_group *nvme_dev_attr_groups[] = {
2254 &nvme_dev_attrs_group,
2255 NULL,
2256};
2257
5bae7f73
CH
2258static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
2259{
2260 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
2261 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
2262
2263 return nsa->ns_id - nsb->ns_id;
2264}
2265
32f0c4af 2266static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
5bae7f73 2267{
32f0c4af 2268 struct nvme_ns *ns, *ret = NULL;
69d3b8ac 2269
32f0c4af 2270 mutex_lock(&ctrl->namespaces_mutex);
5bae7f73 2271 list_for_each_entry(ns, &ctrl->namespaces, list) {
32f0c4af 2272 if (ns->ns_id == nsid) {
2dd41228
CH
2273 if (!kref_get_unless_zero(&ns->kref))
2274 continue;
32f0c4af
KB
2275 ret = ns;
2276 break;
2277 }
5bae7f73
CH
2278 if (ns->ns_id > nsid)
2279 break;
2280 }
32f0c4af
KB
2281 mutex_unlock(&ctrl->namespaces_mutex);
2282 return ret;
5bae7f73
CH
2283}
2284
f5d11840
JA
2285static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
2286{
2287 struct streams_directive_params s;
2288 int ret;
2289
2290 if (!ctrl->nr_streams)
2291 return 0;
2292
2293 ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
2294 if (ret)
2295 return ret;
2296
2297 ns->sws = le32_to_cpu(s.sws);
2298 ns->sgs = le16_to_cpu(s.sgs);
2299
2300 if (ns->sws) {
2301 unsigned int bs = 1 << ns->lba_shift;
2302
2303 blk_queue_io_min(ns->queue, bs * ns->sws);
2304 if (ns->sgs)
2305 blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
2306 }
2307
2308 return 0;
2309}
2310
5bae7f73
CH
2311static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2312{
2313 struct nvme_ns *ns;
2314 struct gendisk *disk;
ac81bfa9
MB
2315 struct nvme_id_ns *id;
2316 char disk_name[DISK_NAME_LEN];
5bae7f73
CH
2317 int node = dev_to_node(ctrl->dev);
2318
2319 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
2320 if (!ns)
2321 return;
2322
075790eb
KB
2323 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
2324 if (ns->instance < 0)
2325 goto out_free_ns;
2326
5bae7f73
CH
2327 ns->queue = blk_mq_init_queue(ctrl->tagset);
2328 if (IS_ERR(ns->queue))
075790eb 2329 goto out_release_instance;
5bae7f73
CH
2330 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
2331 ns->queue->queuedata = ns;
2332 ns->ctrl = ctrl;
2333
5bae7f73
CH
2334 kref_init(&ns->kref);
2335 ns->ns_id = nsid;
5bae7f73 2336 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
5bae7f73
CH
2337
2338 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
da35825d 2339 nvme_set_queue_limits(ctrl, ns->queue);
f5d11840 2340 nvme_setup_streams_ns(ctrl, ns);
5bae7f73 2341
ac81bfa9 2342 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
5bae7f73 2343
cdbff4f2
CH
2344 id = nvme_identify_ns(ctrl, nsid);
2345 if (!id)
ac81bfa9
MB
2346 goto out_free_queue;
2347
cdbff4f2
CH
2348 if (id->ncap == 0)
2349 goto out_free_id;
2350
2351 nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
2352
608cc4b1
CH
2353 if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
2354 if (nvme_nvm_register(ns, disk_name, node)) {
2355 dev_warn(ctrl->device, "LightNVM init failure\n");
2356 goto out_free_id;
2357 }
3dc87dd0 2358 }
ac81bfa9 2359
3dc87dd0
MB
2360 disk = alloc_disk_node(0, node);
2361 if (!disk)
2362 goto out_free_id;
ac81bfa9 2363
3dc87dd0
MB
2364 disk->fops = &nvme_fops;
2365 disk->private_data = ns;
2366 disk->queue = ns->queue;
2367 disk->flags = GENHD_FL_EXT_DEVT;
2368 memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
2369 ns->disk = disk;
2370
2371 __nvme_revalidate_disk(disk, id);
5bae7f73 2372
32f0c4af
KB
2373 mutex_lock(&ctrl->namespaces_mutex);
2374 list_add_tail(&ns->list, &ctrl->namespaces);
2375 mutex_unlock(&ctrl->namespaces_mutex);
2376
d22524a4 2377 nvme_get_ctrl(ctrl);
ac81bfa9
MB
2378
2379 kfree(id);
2380
0d52c756 2381 device_add_disk(ctrl->device, ns->disk);
2b9b6e86
KB
2382 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
2383 &nvme_ns_attr_group))
2384 pr_warn("%s: failed to create sysfs group for identification\n",
2385 ns->disk->disk_name);
3dc87dd0
MB
2386 if (ns->ndev && nvme_nvm_register_sysfs(ns))
2387 pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
2388 ns->disk->disk_name);
5bae7f73 2389 return;
ac81bfa9
MB
2390 out_free_id:
2391 kfree(id);
5bae7f73
CH
2392 out_free_queue:
2393 blk_cleanup_queue(ns->queue);
075790eb
KB
2394 out_release_instance:
2395 ida_simple_remove(&ctrl->ns_ida, ns->instance);
5bae7f73
CH
2396 out_free_ns:
2397 kfree(ns);
2398}
2399
2400static void nvme_ns_remove(struct nvme_ns *ns)
2401{
646017a6
KB
2402 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
2403 return;
69d3b8ac 2404
b0b4e09c 2405 if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
5bae7f73
CH
2406 if (blk_get_integrity(ns->disk))
2407 blk_integrity_unregister(ns->disk);
2b9b6e86
KB
2408 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
2409 &nvme_ns_attr_group);
3dc87dd0
MB
2410 if (ns->ndev)
2411 nvme_nvm_unregister_sysfs(ns);
5bae7f73 2412 del_gendisk(ns->disk);
5bae7f73
CH
2413 blk_cleanup_queue(ns->queue);
2414 }
32f0c4af
KB
2415
2416 mutex_lock(&ns->ctrl->namespaces_mutex);
5bae7f73 2417 list_del_init(&ns->list);
32f0c4af
KB
2418 mutex_unlock(&ns->ctrl->namespaces_mutex);
2419
5bae7f73
CH
2420 nvme_put_ns(ns);
2421}
2422
540c801c
KB
2423static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2424{
2425 struct nvme_ns *ns;
2426
32f0c4af 2427 ns = nvme_find_get_ns(ctrl, nsid);
540c801c 2428 if (ns) {
b0b4e09c 2429 if (ns->disk && revalidate_disk(ns->disk))
540c801c 2430 nvme_ns_remove(ns);
32f0c4af 2431 nvme_put_ns(ns);
540c801c
KB
2432 } else
2433 nvme_alloc_ns(ctrl, nsid);
2434}
2435
47b0e50a
SB
2436static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
2437 unsigned nsid)
2438{
2439 struct nvme_ns *ns, *next;
2440
2441 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
2442 if (ns->ns_id > nsid)
2443 nvme_ns_remove(ns);
2444 }
2445}
2446
540c801c
KB
2447static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
2448{
2449 struct nvme_ns *ns;
2450 __le32 *ns_list;
2451 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
2452 int ret = 0;
2453
2454 ns_list = kzalloc(0x1000, GFP_KERNEL);
2455 if (!ns_list)
2456 return -ENOMEM;
2457
2458 for (i = 0; i < num_lists; i++) {
2459 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
2460 if (ret)
47b0e50a 2461 goto free;
540c801c
KB
2462
2463 for (j = 0; j < min(nn, 1024U); j++) {
2464 nsid = le32_to_cpu(ns_list[j]);
2465 if (!nsid)
2466 goto out;
2467
2468 nvme_validate_ns(ctrl, nsid);
2469
2470 while (++prev < nsid) {
32f0c4af
KB
2471 ns = nvme_find_get_ns(ctrl, prev);
2472 if (ns) {
540c801c 2473 nvme_ns_remove(ns);
32f0c4af
KB
2474 nvme_put_ns(ns);
2475 }
540c801c
KB
2476 }
2477 }
2478 nn -= j;
2479 }
2480 out:
47b0e50a
SB
2481 nvme_remove_invalid_namespaces(ctrl, prev);
2482 free:
540c801c
KB
2483 kfree(ns_list);
2484 return ret;
2485}
2486
5955be21 2487static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
5bae7f73 2488{
5bae7f73
CH
2489 unsigned i;
2490
540c801c
KB
2491 for (i = 1; i <= nn; i++)
2492 nvme_validate_ns(ctrl, i);
2493
47b0e50a 2494 nvme_remove_invalid_namespaces(ctrl, nn);
5bae7f73
CH
2495}
2496
5955be21 2497static void nvme_scan_work(struct work_struct *work)
5bae7f73 2498{
5955be21
CH
2499 struct nvme_ctrl *ctrl =
2500 container_of(work, struct nvme_ctrl, scan_work);
5bae7f73 2501 struct nvme_id_ctrl *id;
540c801c 2502 unsigned nn;
5bae7f73 2503
5955be21
CH
2504 if (ctrl->state != NVME_CTRL_LIVE)
2505 return;
2506
5bae7f73
CH
2507 if (nvme_identify_ctrl(ctrl, &id))
2508 return;
540c801c
KB
2509
2510 nn = le32_to_cpu(id->nn);
8ef2074d 2511 if (ctrl->vs >= NVME_VS(1, 1, 0) &&
540c801c
KB
2512 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
2513 if (!nvme_scan_ns_list(ctrl, nn))
2514 goto done;
2515 }
5955be21 2516 nvme_scan_ns_sequential(ctrl, nn);
540c801c 2517 done:
32f0c4af 2518 mutex_lock(&ctrl->namespaces_mutex);
540c801c 2519 list_sort(NULL, &ctrl->namespaces, ns_cmp);
69d3b8ac 2520 mutex_unlock(&ctrl->namespaces_mutex);
5bae7f73
CH
2521 kfree(id);
2522}
5955be21
CH
2523
2524void nvme_queue_scan(struct nvme_ctrl *ctrl)
2525{
2526 /*
2527 * Do not queue new scan work when a controller is reset during
2528 * removal.
2529 */
2530 if (ctrl->state == NVME_CTRL_LIVE)
c669ccdc 2531 queue_work(nvme_wq, &ctrl->scan_work);
5955be21
CH
2532}
2533EXPORT_SYMBOL_GPL(nvme_queue_scan);
5bae7f73 2534
32f0c4af
KB
2535/*
2536 * This function iterates the namespace list unlocked to allow recovery from
2537 * controller failure. It is up to the caller to ensure the namespace list is
2538 * not modified by scan work while this function is executing.
2539 */
5bae7f73
CH
2540void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
2541{
2542 struct nvme_ns *ns, *next;
2543
0ff9d4e1
KB
2544 /*
2545 * The dead states indicates the controller was not gracefully
2546 * disconnected. In that case, we won't be able to flush any data while
2547 * removing the namespaces' disks; fail all the queues now to avoid
2548 * potentially having to clean up the failed sync later.
2549 */
2550 if (ctrl->state == NVME_CTRL_DEAD)
2551 nvme_kill_queues(ctrl);
2552
5bae7f73
CH
2553 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
2554 nvme_ns_remove(ns);
2555}
576d55d6 2556EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
5bae7f73 2557
f866fc42
CH
2558static void nvme_async_event_work(struct work_struct *work)
2559{
2560 struct nvme_ctrl *ctrl =
2561 container_of(work, struct nvme_ctrl, async_event_work);
2562
2563 spin_lock_irq(&ctrl->lock);
cd48282c 2564 while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
f866fc42
CH
2565 int aer_idx = --ctrl->event_limit;
2566
2567 spin_unlock_irq(&ctrl->lock);
2568 ctrl->ops->submit_async_event(ctrl, aer_idx);
2569 spin_lock_irq(&ctrl->lock);
2570 }
2571 spin_unlock_irq(&ctrl->lock);
2572}
2573
b6dccf7f
AD
2574static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
2575{
2576
2577 u32 csts;
2578
2579 if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
2580 return false;
2581
2582 if (csts == ~0)
2583 return false;
2584
2585 return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP));
2586}
2587
2588static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
2589{
2590 struct nvme_command c = { };
2591 struct nvme_fw_slot_info_log *log;
2592
2593 log = kmalloc(sizeof(*log), GFP_KERNEL);
2594 if (!log)
2595 return;
2596
2597 c.common.opcode = nvme_admin_get_log_page;
62346eae 2598 c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
b6dccf7f
AD
2599 c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
2600
2601 if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
2602 dev_warn(ctrl->device,
2603 "Get FW SLOT INFO log error\n");
2604 kfree(log);
2605}
2606
2607static void nvme_fw_act_work(struct work_struct *work)
2608{
2609 struct nvme_ctrl *ctrl = container_of(work,
2610 struct nvme_ctrl, fw_act_work);
2611 unsigned long fw_act_timeout;
2612
2613 if (ctrl->mtfa)
2614 fw_act_timeout = jiffies +
2615 msecs_to_jiffies(ctrl->mtfa * 100);
2616 else
2617 fw_act_timeout = jiffies +
2618 msecs_to_jiffies(admin_timeout * 1000);
2619
2620 nvme_stop_queues(ctrl);
2621 while (nvme_ctrl_pp_status(ctrl)) {
2622 if (time_after(jiffies, fw_act_timeout)) {
2623 dev_warn(ctrl->device,
2624 "Fw activation timeout, reset controller\n");
2625 nvme_reset_ctrl(ctrl);
2626 break;
2627 }
2628 msleep(100);
2629 }
2630
2631 if (ctrl->state != NVME_CTRL_LIVE)
2632 return;
2633
2634 nvme_start_queues(ctrl);
2635 /* read FW slot informationi to clear the AER*/
2636 nvme_get_fw_slot_info(ctrl);
2637}
2638
7bf58533
CH
2639void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2640 union nvme_result *res)
f866fc42 2641{
7bf58533
CH
2642 u32 result = le32_to_cpu(res->u32);
2643 bool done = true;
f866fc42 2644
7bf58533
CH
2645 switch (le16_to_cpu(status) >> 1) {
2646 case NVME_SC_SUCCESS:
2647 done = false;
2648 /*FALLTHRU*/
2649 case NVME_SC_ABORT_REQ:
f866fc42 2650 ++ctrl->event_limit;
cd48282c 2651 if (ctrl->state == NVME_CTRL_LIVE)
1a40d972 2652 queue_work(nvme_wq, &ctrl->async_event_work);
7bf58533
CH
2653 break;
2654 default:
2655 break;
f866fc42
CH
2656 }
2657
7bf58533 2658 if (done)
f866fc42
CH
2659 return;
2660
2661 switch (result & 0xff07) {
2662 case NVME_AER_NOTICE_NS_CHANGED:
2663 dev_info(ctrl->device, "rescanning\n");
2664 nvme_queue_scan(ctrl);
2665 break;
b6dccf7f 2666 case NVME_AER_NOTICE_FW_ACT_STARTING:
1a40d972 2667 queue_work(nvme_wq, &ctrl->fw_act_work);
b6dccf7f 2668 break;
f866fc42
CH
2669 default:
2670 dev_warn(ctrl->device, "async event result %08x\n", result);
2671 }
2672}
2673EXPORT_SYMBOL_GPL(nvme_complete_async_event);
2674
2675void nvme_queue_async_events(struct nvme_ctrl *ctrl)
2676{
2677 ctrl->event_limit = NVME_NR_AERS;
c669ccdc 2678 queue_work(nvme_wq, &ctrl->async_event_work);
f866fc42
CH
2679}
2680EXPORT_SYMBOL_GPL(nvme_queue_async_events);
2681
d09f2b45 2682void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
576d55d6 2683{
d09f2b45 2684 nvme_stop_keep_alive(ctrl);
f866fc42 2685 flush_work(&ctrl->async_event_work);
5955be21 2686 flush_work(&ctrl->scan_work);
b6dccf7f 2687 cancel_work_sync(&ctrl->fw_act_work);
d09f2b45
SG
2688}
2689EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
2690
2691void nvme_start_ctrl(struct nvme_ctrl *ctrl)
2692{
2693 if (ctrl->kato)
2694 nvme_start_keep_alive(ctrl);
2695
2696 if (ctrl->queue_count > 1) {
2697 nvme_queue_scan(ctrl);
2698 nvme_queue_async_events(ctrl);
2699 nvme_start_queues(ctrl);
2700 }
2701}
2702EXPORT_SYMBOL_GPL(nvme_start_ctrl);
5955be21 2703
d09f2b45
SG
2704void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
2705{
d22524a4 2706 device_del(ctrl->device);
f3ca80fc
CH
2707
2708 spin_lock(&dev_list_lock);
2709 list_del(&ctrl->node);
2710 spin_unlock(&dev_list_lock);
53029b04 2711}
576d55d6 2712EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
53029b04 2713
d22524a4 2714static void nvme_free_ctrl(struct device *dev)
53029b04 2715{
d22524a4
CH
2716 struct nvme_ctrl *ctrl =
2717 container_of(dev, struct nvme_ctrl, ctrl_device);
f3ca80fc 2718
9843f685 2719 ida_simple_remove(&nvme_instance_ida, ctrl->instance);
075790eb 2720 ida_destroy(&ctrl->ns_ida);
f3ca80fc
CH
2721
2722 ctrl->ops->free_ctrl(ctrl);
2723}
2724
f3ca80fc
CH
2725/*
2726 * Initialize a NVMe controller structures. This needs to be called during
2727 * earliest initialization so that we have the initialized structured around
2728 * during probing.
2729 */
2730int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
2731 const struct nvme_ctrl_ops *ops, unsigned long quirks)
2732{
2733 int ret;
2734
bb8d261e
CH
2735 ctrl->state = NVME_CTRL_NEW;
2736 spin_lock_init(&ctrl->lock);
f3ca80fc 2737 INIT_LIST_HEAD(&ctrl->namespaces);
69d3b8ac 2738 mutex_init(&ctrl->namespaces_mutex);
f3ca80fc
CH
2739 ctrl->dev = dev;
2740 ctrl->ops = ops;
2741 ctrl->quirks = quirks;
5955be21 2742 INIT_WORK(&ctrl->scan_work, nvme_scan_work);
f866fc42 2743 INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
b6dccf7f 2744 INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
f3ca80fc 2745
9843f685
CH
2746 ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
2747 if (ret < 0)
f3ca80fc 2748 goto out;
9843f685 2749 ctrl->instance = ret;
f3ca80fc 2750
d22524a4
CH
2751 device_initialize(&ctrl->ctrl_device);
2752 ctrl->device = &ctrl->ctrl_device;
2753 ctrl->device->devt = MKDEV(nvme_char_major, ctrl->instance);
2754 ctrl->device->class = nvme_class;
2755 ctrl->device->parent = ctrl->dev;
2756 ctrl->device->groups = nvme_dev_attr_groups;
2757 ctrl->device->release = nvme_free_ctrl;
2758 dev_set_drvdata(ctrl->device, ctrl);
2759 ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
2760 if (ret)
f3ca80fc 2761 goto out_release_instance;
d22524a4
CH
2762 ret = device_add(ctrl->device);
2763 if (ret)
2764 goto out_free_name;
2765
075790eb 2766 ida_init(&ctrl->ns_ida);
f3ca80fc 2767
f3ca80fc
CH
2768 spin_lock(&dev_list_lock);
2769 list_add_tail(&ctrl->node, &nvme_ctrl_list);
2770 spin_unlock(&dev_list_lock);
2771
c5552fde
AL
2772 /*
2773 * Initialize latency tolerance controls. The sysfs files won't
2774 * be visible to userspace unless the device actually supports APST.
2775 */
2776 ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
2777 dev_pm_qos_update_user_latency_tolerance(ctrl->device,
2778 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
2779
f3ca80fc 2780 return 0;
d22524a4
CH
2781out_free_name:
2782 kfree_const(dev->kobj.name);
f3ca80fc 2783out_release_instance:
9843f685 2784 ida_simple_remove(&nvme_instance_ida, ctrl->instance);
f3ca80fc
CH
2785out:
2786 return ret;
2787}
576d55d6 2788EXPORT_SYMBOL_GPL(nvme_init_ctrl);
f3ca80fc 2789
69d9a99c
KB
2790/**
2791 * nvme_kill_queues(): Ends all namespace queues
2792 * @ctrl: the dead controller that needs to end
2793 *
2794 * Call this function when the driver determines it is unable to get the
2795 * controller in a state capable of servicing IO.
2796 */
2797void nvme_kill_queues(struct nvme_ctrl *ctrl)
2798{
2799 struct nvme_ns *ns;
2800
32f0c4af 2801 mutex_lock(&ctrl->namespaces_mutex);
82654b6b 2802
443bd90f 2803 /* Forcibly unquiesce queues to avoid blocking dispatch */
7dd1ab16
SB
2804 if (ctrl->admin_q)
2805 blk_mq_unquiesce_queue(ctrl->admin_q);
443bd90f 2806
32f0c4af 2807 list_for_each_entry(ns, &ctrl->namespaces, list) {
69d9a99c
KB
2808 /*
2809 * Revalidating a dead namespace sets capacity to 0. This will
2810 * end buffered writers dirtying pages that can't be synced.
2811 */
f33447b9
KB
2812 if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
2813 continue;
2814 revalidate_disk(ns->disk);
69d9a99c 2815 blk_set_queue_dying(ns->queue);
806f026f 2816
443bd90f
ML
2817 /* Forcibly unquiesce queues to avoid blocking dispatch */
2818 blk_mq_unquiesce_queue(ns->queue);
69d9a99c 2819 }
32f0c4af 2820 mutex_unlock(&ctrl->namespaces_mutex);
69d9a99c 2821}
237045fc 2822EXPORT_SYMBOL_GPL(nvme_kill_queues);
69d9a99c 2823
302ad8cc
KB
2824void nvme_unfreeze(struct nvme_ctrl *ctrl)
2825{
2826 struct nvme_ns *ns;
2827
2828 mutex_lock(&ctrl->namespaces_mutex);
2829 list_for_each_entry(ns, &ctrl->namespaces, list)
2830 blk_mq_unfreeze_queue(ns->queue);
2831 mutex_unlock(&ctrl->namespaces_mutex);
2832}
2833EXPORT_SYMBOL_GPL(nvme_unfreeze);
2834
2835void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
2836{
2837 struct nvme_ns *ns;
2838
2839 mutex_lock(&ctrl->namespaces_mutex);
2840 list_for_each_entry(ns, &ctrl->namespaces, list) {
2841 timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
2842 if (timeout <= 0)
2843 break;
2844 }
2845 mutex_unlock(&ctrl->namespaces_mutex);
2846}
2847EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
2848
2849void nvme_wait_freeze(struct nvme_ctrl *ctrl)
2850{
2851 struct nvme_ns *ns;
2852
2853 mutex_lock(&ctrl->namespaces_mutex);
2854 list_for_each_entry(ns, &ctrl->namespaces, list)
2855 blk_mq_freeze_queue_wait(ns->queue);
2856 mutex_unlock(&ctrl->namespaces_mutex);
2857}
2858EXPORT_SYMBOL_GPL(nvme_wait_freeze);
2859
2860void nvme_start_freeze(struct nvme_ctrl *ctrl)
2861{
2862 struct nvme_ns *ns;
2863
2864 mutex_lock(&ctrl->namespaces_mutex);
2865 list_for_each_entry(ns, &ctrl->namespaces, list)
1671d522 2866 blk_freeze_queue_start(ns->queue);
302ad8cc
KB
2867 mutex_unlock(&ctrl->namespaces_mutex);
2868}
2869EXPORT_SYMBOL_GPL(nvme_start_freeze);
2870
25646264 2871void nvme_stop_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
2872{
2873 struct nvme_ns *ns;
2874
32f0c4af 2875 mutex_lock(&ctrl->namespaces_mutex);
a6eaa884 2876 list_for_each_entry(ns, &ctrl->namespaces, list)
3174dd33 2877 blk_mq_quiesce_queue(ns->queue);
32f0c4af 2878 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac 2879}
576d55d6 2880EXPORT_SYMBOL_GPL(nvme_stop_queues);
363c9aac 2881
25646264 2882void nvme_start_queues(struct nvme_ctrl *ctrl)
363c9aac
SG
2883{
2884 struct nvme_ns *ns;
2885
32f0c4af 2886 mutex_lock(&ctrl->namespaces_mutex);
8d7b8faf 2887 list_for_each_entry(ns, &ctrl->namespaces, list)
f660174e 2888 blk_mq_unquiesce_queue(ns->queue);
32f0c4af 2889 mutex_unlock(&ctrl->namespaces_mutex);
363c9aac 2890}
576d55d6 2891EXPORT_SYMBOL_GPL(nvme_start_queues);
363c9aac 2892
31b84460
SG
2893int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
2894{
2895 if (!ctrl->ops->reinit_request)
2896 return 0;
2897
2898 return blk_mq_tagset_iter(set, set->driver_data,
2899 ctrl->ops->reinit_request);
2900}
2901EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
2902
5bae7f73
CH
2903int __init nvme_core_init(void)
2904{
2905 int result;
2906
9a6327d2
SG
2907 nvme_wq = alloc_workqueue("nvme-wq",
2908 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2909 if (!nvme_wq)
2910 return -ENOMEM;
2911
f3ca80fc
CH
2912 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
2913 &nvme_dev_fops);
2914 if (result < 0)
9a6327d2 2915 goto destroy_wq;
f3ca80fc
CH
2916 else if (result > 0)
2917 nvme_char_major = result;
2918
2919 nvme_class = class_create(THIS_MODULE, "nvme");
2920 if (IS_ERR(nvme_class)) {
2921 result = PTR_ERR(nvme_class);
2922 goto unregister_chrdev;
2923 }
2924
5bae7f73 2925 return 0;
f3ca80fc 2926
9a6327d2 2927unregister_chrdev:
f3ca80fc 2928 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
9a6327d2
SG
2929destroy_wq:
2930 destroy_workqueue(nvme_wq);
f3ca80fc 2931 return result;
5bae7f73
CH
2932}
2933
2934void nvme_core_exit(void)
2935{
f3ca80fc
CH
2936 class_destroy(nvme_class);
2937 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
9a6327d2 2938 destroy_workqueue(nvme_wq);
5bae7f73 2939}
576d55d6
ML
2940
2941MODULE_LICENSE("GPL");
2942MODULE_VERSION("1.0");
2943module_init(nvme_core_init);
2944module_exit(nvme_core_exit);