]>
Commit | Line | Data |
---|---|---|
21d34711 CH |
1 | /* |
2 | * NVM Express device driver | |
3 | * Copyright (c) 2011-2014, Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | */ | |
14 | ||
15 | #include <linux/blkdev.h> | |
16 | #include <linux/blk-mq.h> | |
5fd4ce1b | 17 | #include <linux/delay.h> |
21d34711 | 18 | #include <linux/errno.h> |
1673f1f0 | 19 | #include <linux/hdreg.h> |
21d34711 CH |
20 | #include <linux/kernel.h> |
21 | #include <linux/slab.h> | |
22 | #include <linux/types.h> | |
1673f1f0 CH |
23 | #include <linux/pr.h> |
24 | #include <linux/ptrace.h> | |
25 | #include <linux/nvme_ioctl.h> | |
26 | #include <linux/t10-pi.h> | |
27 | #include <scsi/sg.h> | |
28 | #include <asm/unaligned.h> | |
21d34711 CH |
29 | |
30 | #include "nvme.h" | |
31 | ||
1673f1f0 CH |
32 | DEFINE_SPINLOCK(dev_list_lock); |
33 | ||
34 | static void nvme_free_ns(struct kref *kref) | |
35 | { | |
36 | struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); | |
37 | ||
38 | if (ns->type == NVME_NS_LIGHTNVM) | |
39 | nvme_nvm_unregister(ns->queue, ns->disk->disk_name); | |
40 | ||
41 | spin_lock(&dev_list_lock); | |
42 | ns->disk->private_data = NULL; | |
43 | spin_unlock(&dev_list_lock); | |
44 | ||
45 | nvme_put_ctrl(ns->ctrl); | |
46 | put_disk(ns->disk); | |
47 | kfree(ns); | |
48 | } | |
49 | ||
50 | void nvme_put_ns(struct nvme_ns *ns) | |
51 | { | |
52 | kref_put(&ns->kref, nvme_free_ns); | |
53 | } | |
54 | ||
55 | static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) | |
56 | { | |
57 | struct nvme_ns *ns; | |
58 | ||
59 | spin_lock(&dev_list_lock); | |
60 | ns = disk->private_data; | |
61 | if (ns && !kref_get_unless_zero(&ns->kref)) | |
62 | ns = NULL; | |
63 | spin_unlock(&dev_list_lock); | |
64 | ||
65 | return ns; | |
66 | } | |
67 | ||
4160982e CH |
68 | struct request *nvme_alloc_request(struct request_queue *q, |
69 | struct nvme_command *cmd, unsigned int flags) | |
21d34711 CH |
70 | { |
71 | bool write = cmd->common.opcode & 1; | |
21d34711 | 72 | struct request *req; |
21d34711 | 73 | |
4160982e | 74 | req = blk_mq_alloc_request(q, write, flags); |
21d34711 | 75 | if (IS_ERR(req)) |
4160982e | 76 | return req; |
21d34711 CH |
77 | |
78 | req->cmd_type = REQ_TYPE_DRV_PRIV; | |
79 | req->cmd_flags |= REQ_FAILFAST_DRIVER; | |
80 | req->__data_len = 0; | |
81 | req->__sector = (sector_t) -1; | |
82 | req->bio = req->biotail = NULL; | |
83 | ||
21d34711 CH |
84 | req->cmd = (unsigned char *)cmd; |
85 | req->cmd_len = sizeof(struct nvme_command); | |
86 | req->special = (void *)0; | |
87 | ||
4160982e CH |
88 | return req; |
89 | } | |
90 | ||
91 | /* | |
92 | * Returns 0 on success. If the result is negative, it's a Linux error code; | |
93 | * if the result is positive, it's an NVM Express status code | |
94 | */ | |
95 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | |
96 | void *buffer, unsigned bufflen, u32 *result, unsigned timeout) | |
97 | { | |
98 | struct request *req; | |
99 | int ret; | |
100 | ||
101 | req = nvme_alloc_request(q, cmd, 0); | |
102 | if (IS_ERR(req)) | |
103 | return PTR_ERR(req); | |
104 | ||
105 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; | |
106 | ||
21d34711 CH |
107 | if (buffer && bufflen) { |
108 | ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); | |
109 | if (ret) | |
110 | goto out; | |
4160982e CH |
111 | } |
112 | ||
113 | blk_execute_rq(req->q, NULL, req, 0); | |
114 | if (result) | |
115 | *result = (u32)(uintptr_t)req->special; | |
116 | ret = req->errors; | |
117 | out: | |
118 | blk_mq_free_request(req); | |
119 | return ret; | |
120 | } | |
121 | ||
122 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | |
123 | void *buffer, unsigned bufflen) | |
124 | { | |
125 | return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); | |
126 | } | |
127 | ||
0b7f1f26 KB |
128 | int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, |
129 | void __user *ubuffer, unsigned bufflen, | |
130 | void __user *meta_buffer, unsigned meta_len, u32 meta_seed, | |
131 | u32 *result, unsigned timeout) | |
4160982e | 132 | { |
0b7f1f26 KB |
133 | bool write = cmd->common.opcode & 1; |
134 | struct nvme_ns *ns = q->queuedata; | |
135 | struct gendisk *disk = ns ? ns->disk : NULL; | |
4160982e | 136 | struct request *req; |
0b7f1f26 KB |
137 | struct bio *bio = NULL; |
138 | void *meta = NULL; | |
4160982e CH |
139 | int ret; |
140 | ||
141 | req = nvme_alloc_request(q, cmd, 0); | |
142 | if (IS_ERR(req)) | |
143 | return PTR_ERR(req); | |
144 | ||
145 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; | |
146 | ||
147 | if (ubuffer && bufflen) { | |
21d34711 CH |
148 | ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, |
149 | GFP_KERNEL); | |
150 | if (ret) | |
151 | goto out; | |
152 | bio = req->bio; | |
21d34711 | 153 | |
0b7f1f26 KB |
154 | if (!disk) |
155 | goto submit; | |
156 | bio->bi_bdev = bdget_disk(disk, 0); | |
157 | if (!bio->bi_bdev) { | |
158 | ret = -ENODEV; | |
159 | goto out_unmap; | |
160 | } | |
161 | ||
162 | if (meta_buffer) { | |
163 | struct bio_integrity_payload *bip; | |
164 | ||
165 | meta = kmalloc(meta_len, GFP_KERNEL); | |
166 | if (!meta) { | |
167 | ret = -ENOMEM; | |
168 | goto out_unmap; | |
169 | } | |
170 | ||
171 | if (write) { | |
172 | if (copy_from_user(meta, meta_buffer, | |
173 | meta_len)) { | |
174 | ret = -EFAULT; | |
175 | goto out_free_meta; | |
176 | } | |
177 | } | |
178 | ||
179 | bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); | |
180 | if (!bip) { | |
181 | ret = -ENOMEM; | |
182 | goto out_free_meta; | |
183 | } | |
184 | ||
185 | bip->bip_iter.bi_size = meta_len; | |
186 | bip->bip_iter.bi_sector = meta_seed; | |
187 | ||
188 | ret = bio_integrity_add_page(bio, virt_to_page(meta), | |
189 | meta_len, offset_in_page(meta)); | |
190 | if (ret != meta_len) { | |
191 | ret = -ENOMEM; | |
192 | goto out_free_meta; | |
193 | } | |
194 | } | |
195 | } | |
196 | submit: | |
197 | blk_execute_rq(req->q, disk, req, 0); | |
198 | ret = req->errors; | |
21d34711 CH |
199 | if (result) |
200 | *result = (u32)(uintptr_t)req->special; | |
0b7f1f26 KB |
201 | if (meta && !ret && !write) { |
202 | if (copy_to_user(meta_buffer, meta, meta_len)) | |
203 | ret = -EFAULT; | |
204 | } | |
205 | out_free_meta: | |
206 | kfree(meta); | |
207 | out_unmap: | |
208 | if (bio) { | |
209 | if (disk && bio->bi_bdev) | |
210 | bdput(bio->bi_bdev); | |
211 | blk_rq_unmap_user(bio); | |
212 | } | |
21d34711 CH |
213 | out: |
214 | blk_mq_free_request(req); | |
215 | return ret; | |
216 | } | |
217 | ||
0b7f1f26 KB |
218 | int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, |
219 | void __user *ubuffer, unsigned bufflen, u32 *result, | |
220 | unsigned timeout) | |
221 | { | |
222 | return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, | |
223 | result, timeout); | |
224 | } | |
225 | ||
1c63dc66 | 226 | int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) |
21d34711 CH |
227 | { |
228 | struct nvme_command c = { }; | |
229 | int error; | |
230 | ||
231 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | |
232 | c.identify.opcode = nvme_admin_identify; | |
233 | c.identify.cns = cpu_to_le32(1); | |
234 | ||
235 | *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); | |
236 | if (!*id) | |
237 | return -ENOMEM; | |
238 | ||
239 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | |
240 | sizeof(struct nvme_id_ctrl)); | |
241 | if (error) | |
242 | kfree(*id); | |
243 | return error; | |
244 | } | |
245 | ||
1c63dc66 | 246 | int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, |
21d34711 CH |
247 | struct nvme_id_ns **id) |
248 | { | |
249 | struct nvme_command c = { }; | |
250 | int error; | |
251 | ||
252 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | |
253 | c.identify.opcode = nvme_admin_identify, | |
254 | c.identify.nsid = cpu_to_le32(nsid), | |
255 | ||
256 | *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); | |
257 | if (!*id) | |
258 | return -ENOMEM; | |
259 | ||
260 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | |
261 | sizeof(struct nvme_id_ns)); | |
262 | if (error) | |
263 | kfree(*id); | |
264 | return error; | |
265 | } | |
266 | ||
1c63dc66 | 267 | int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, |
21d34711 CH |
268 | dma_addr_t dma_addr, u32 *result) |
269 | { | |
270 | struct nvme_command c; | |
271 | ||
272 | memset(&c, 0, sizeof(c)); | |
273 | c.features.opcode = nvme_admin_get_features; | |
274 | c.features.nsid = cpu_to_le32(nsid); | |
275 | c.features.prp1 = cpu_to_le64(dma_addr); | |
276 | c.features.fid = cpu_to_le32(fid); | |
277 | ||
4160982e | 278 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); |
21d34711 CH |
279 | } |
280 | ||
1c63dc66 | 281 | int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, |
21d34711 CH |
282 | dma_addr_t dma_addr, u32 *result) |
283 | { | |
284 | struct nvme_command c; | |
285 | ||
286 | memset(&c, 0, sizeof(c)); | |
287 | c.features.opcode = nvme_admin_set_features; | |
288 | c.features.prp1 = cpu_to_le64(dma_addr); | |
289 | c.features.fid = cpu_to_le32(fid); | |
290 | c.features.dword11 = cpu_to_le32(dword11); | |
291 | ||
4160982e | 292 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); |
21d34711 CH |
293 | } |
294 | ||
1c63dc66 | 295 | int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) |
21d34711 CH |
296 | { |
297 | struct nvme_command c = { }; | |
298 | int error; | |
299 | ||
300 | c.common.opcode = nvme_admin_get_log_page, | |
301 | c.common.nsid = cpu_to_le32(0xFFFFFFFF), | |
302 | c.common.cdw10[0] = cpu_to_le32( | |
303 | (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | | |
304 | NVME_LOG_SMART), | |
305 | ||
306 | *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); | |
307 | if (!*log) | |
308 | return -ENOMEM; | |
309 | ||
310 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, | |
311 | sizeof(struct nvme_smart_log)); | |
312 | if (error) | |
313 | kfree(*log); | |
314 | return error; | |
315 | } | |
1673f1f0 CH |
316 | |
317 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |
318 | { | |
319 | struct nvme_user_io io; | |
320 | struct nvme_command c; | |
321 | unsigned length, meta_len; | |
322 | void __user *metadata; | |
323 | ||
324 | if (copy_from_user(&io, uio, sizeof(io))) | |
325 | return -EFAULT; | |
326 | ||
327 | switch (io.opcode) { | |
328 | case nvme_cmd_write: | |
329 | case nvme_cmd_read: | |
330 | case nvme_cmd_compare: | |
331 | break; | |
332 | default: | |
333 | return -EINVAL; | |
334 | } | |
335 | ||
336 | length = (io.nblocks + 1) << ns->lba_shift; | |
337 | meta_len = (io.nblocks + 1) * ns->ms; | |
338 | metadata = (void __user *)(uintptr_t)io.metadata; | |
339 | ||
340 | if (ns->ext) { | |
341 | length += meta_len; | |
342 | meta_len = 0; | |
343 | } else if (meta_len) { | |
344 | if ((io.metadata & 3) || !io.metadata) | |
345 | return -EINVAL; | |
346 | } | |
347 | ||
348 | memset(&c, 0, sizeof(c)); | |
349 | c.rw.opcode = io.opcode; | |
350 | c.rw.flags = io.flags; | |
351 | c.rw.nsid = cpu_to_le32(ns->ns_id); | |
352 | c.rw.slba = cpu_to_le64(io.slba); | |
353 | c.rw.length = cpu_to_le16(io.nblocks); | |
354 | c.rw.control = cpu_to_le16(io.control); | |
355 | c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); | |
356 | c.rw.reftag = cpu_to_le32(io.reftag); | |
357 | c.rw.apptag = cpu_to_le16(io.apptag); | |
358 | c.rw.appmask = cpu_to_le16(io.appmask); | |
359 | ||
360 | return __nvme_submit_user_cmd(ns->queue, &c, | |
361 | (void __user *)(uintptr_t)io.addr, length, | |
362 | metadata, meta_len, io.slba, NULL, 0); | |
363 | } | |
364 | ||
365 | int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, | |
366 | struct nvme_passthru_cmd __user *ucmd) | |
367 | { | |
368 | struct nvme_passthru_cmd cmd; | |
369 | struct nvme_command c; | |
370 | unsigned timeout = 0; | |
371 | int status; | |
372 | ||
373 | if (!capable(CAP_SYS_ADMIN)) | |
374 | return -EACCES; | |
375 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) | |
376 | return -EFAULT; | |
377 | ||
378 | memset(&c, 0, sizeof(c)); | |
379 | c.common.opcode = cmd.opcode; | |
380 | c.common.flags = cmd.flags; | |
381 | c.common.nsid = cpu_to_le32(cmd.nsid); | |
382 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | |
383 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | |
384 | c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); | |
385 | c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); | |
386 | c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); | |
387 | c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); | |
388 | c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); | |
389 | c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); | |
390 | ||
391 | if (cmd.timeout_ms) | |
392 | timeout = msecs_to_jiffies(cmd.timeout_ms); | |
393 | ||
394 | status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, | |
395 | (void __user *)cmd.addr, cmd.data_len, | |
396 | &cmd.result, timeout); | |
397 | if (status >= 0) { | |
398 | if (put_user(cmd.result, &ucmd->result)) | |
399 | return -EFAULT; | |
400 | } | |
401 | ||
402 | return status; | |
403 | } | |
404 | ||
405 | static int nvme_ioctl(struct block_device *bdev, fmode_t mode, | |
406 | unsigned int cmd, unsigned long arg) | |
407 | { | |
408 | struct nvme_ns *ns = bdev->bd_disk->private_data; | |
409 | ||
410 | switch (cmd) { | |
411 | case NVME_IOCTL_ID: | |
412 | force_successful_syscall_return(); | |
413 | return ns->ns_id; | |
414 | case NVME_IOCTL_ADMIN_CMD: | |
415 | return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); | |
416 | case NVME_IOCTL_IO_CMD: | |
417 | return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); | |
418 | case NVME_IOCTL_SUBMIT_IO: | |
419 | return nvme_submit_io(ns, (void __user *)arg); | |
420 | case SG_GET_VERSION_NUM: | |
421 | return nvme_sg_get_version_num((void __user *)arg); | |
422 | case SG_IO: | |
423 | return nvme_sg_io(ns, (void __user *)arg); | |
424 | default: | |
425 | return -ENOTTY; | |
426 | } | |
427 | } | |
428 | ||
429 | #ifdef CONFIG_COMPAT | |
430 | static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, | |
431 | unsigned int cmd, unsigned long arg) | |
432 | { | |
433 | switch (cmd) { | |
434 | case SG_IO: | |
435 | return -ENOIOCTLCMD; | |
436 | } | |
437 | return nvme_ioctl(bdev, mode, cmd, arg); | |
438 | } | |
439 | #else | |
440 | #define nvme_compat_ioctl NULL | |
441 | #endif | |
442 | ||
443 | static int nvme_open(struct block_device *bdev, fmode_t mode) | |
444 | { | |
445 | return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; | |
446 | } | |
447 | ||
448 | static void nvme_release(struct gendisk *disk, fmode_t mode) | |
449 | { | |
450 | nvme_put_ns(disk->private_data); | |
451 | } | |
452 | ||
453 | static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |
454 | { | |
455 | /* some standard values */ | |
456 | geo->heads = 1 << 6; | |
457 | geo->sectors = 1 << 5; | |
458 | geo->cylinders = get_capacity(bdev->bd_disk) >> 11; | |
459 | return 0; | |
460 | } | |
461 | ||
462 | #ifdef CONFIG_BLK_DEV_INTEGRITY | |
463 | static void nvme_init_integrity(struct nvme_ns *ns) | |
464 | { | |
465 | struct blk_integrity integrity; | |
466 | ||
467 | switch (ns->pi_type) { | |
468 | case NVME_NS_DPS_PI_TYPE3: | |
469 | integrity.profile = &t10_pi_type3_crc; | |
470 | break; | |
471 | case NVME_NS_DPS_PI_TYPE1: | |
472 | case NVME_NS_DPS_PI_TYPE2: | |
473 | integrity.profile = &t10_pi_type1_crc; | |
474 | break; | |
475 | default: | |
476 | integrity.profile = NULL; | |
477 | break; | |
478 | } | |
479 | integrity.tuple_size = ns->ms; | |
480 | blk_integrity_register(ns->disk, &integrity); | |
481 | blk_queue_max_integrity_segments(ns->queue, 1); | |
482 | } | |
483 | #else | |
484 | static void nvme_init_integrity(struct nvme_ns *ns) | |
485 | { | |
486 | } | |
487 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | |
488 | ||
489 | static void nvme_config_discard(struct nvme_ns *ns) | |
490 | { | |
491 | u32 logical_block_size = queue_logical_block_size(ns->queue); | |
492 | ns->queue->limits.discard_zeroes_data = 0; | |
493 | ns->queue->limits.discard_alignment = logical_block_size; | |
494 | ns->queue->limits.discard_granularity = logical_block_size; | |
495 | blk_queue_max_discard_sectors(ns->queue, 0xffffffff); | |
496 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); | |
497 | } | |
498 | ||
499 | int nvme_revalidate_disk(struct gendisk *disk) | |
500 | { | |
501 | struct nvme_ns *ns = disk->private_data; | |
502 | struct nvme_id_ns *id; | |
503 | u8 lbaf, pi_type; | |
504 | u16 old_ms; | |
505 | unsigned short bs; | |
506 | ||
507 | if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { | |
508 | dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", | |
509 | __func__, ns->ctrl->instance, ns->ns_id); | |
510 | return -ENODEV; | |
511 | } | |
512 | if (id->ncap == 0) { | |
513 | kfree(id); | |
514 | return -ENODEV; | |
515 | } | |
516 | ||
517 | if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { | |
518 | if (nvme_nvm_register(ns->queue, disk->disk_name)) { | |
519 | dev_warn(ns->ctrl->dev, | |
520 | "%s: LightNVM init failure\n", __func__); | |
521 | kfree(id); | |
522 | return -ENODEV; | |
523 | } | |
524 | ns->type = NVME_NS_LIGHTNVM; | |
525 | } | |
526 | ||
527 | old_ms = ns->ms; | |
528 | lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; | |
529 | ns->lba_shift = id->lbaf[lbaf].ds; | |
530 | ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); | |
531 | ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); | |
532 | ||
533 | /* | |
534 | * If identify namespace failed, use default 512 byte block size so | |
535 | * block layer can use before failing read/write for 0 capacity. | |
536 | */ | |
537 | if (ns->lba_shift == 0) | |
538 | ns->lba_shift = 9; | |
539 | bs = 1 << ns->lba_shift; | |
540 | ||
541 | /* XXX: PI implementation requires metadata equal t10 pi tuple size */ | |
542 | pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? | |
543 | id->dps & NVME_NS_DPS_PI_MASK : 0; | |
544 | ||
545 | blk_mq_freeze_queue(disk->queue); | |
546 | if (blk_get_integrity(disk) && (ns->pi_type != pi_type || | |
547 | ns->ms != old_ms || | |
548 | bs != queue_logical_block_size(disk->queue) || | |
549 | (ns->ms && ns->ext))) | |
550 | blk_integrity_unregister(disk); | |
551 | ||
552 | ns->pi_type = pi_type; | |
553 | blk_queue_logical_block_size(ns->queue, bs); | |
554 | ||
555 | if (ns->ms && !ns->ext) | |
556 | nvme_init_integrity(ns); | |
557 | ||
558 | if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) | |
559 | set_capacity(disk, 0); | |
560 | else | |
561 | set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); | |
562 | ||
563 | if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) | |
564 | nvme_config_discard(ns); | |
565 | blk_mq_unfreeze_queue(disk->queue); | |
566 | ||
567 | kfree(id); | |
568 | return 0; | |
569 | } | |
570 | ||
571 | static char nvme_pr_type(enum pr_type type) | |
572 | { | |
573 | switch (type) { | |
574 | case PR_WRITE_EXCLUSIVE: | |
575 | return 1; | |
576 | case PR_EXCLUSIVE_ACCESS: | |
577 | return 2; | |
578 | case PR_WRITE_EXCLUSIVE_REG_ONLY: | |
579 | return 3; | |
580 | case PR_EXCLUSIVE_ACCESS_REG_ONLY: | |
581 | return 4; | |
582 | case PR_WRITE_EXCLUSIVE_ALL_REGS: | |
583 | return 5; | |
584 | case PR_EXCLUSIVE_ACCESS_ALL_REGS: | |
585 | return 6; | |
586 | default: | |
587 | return 0; | |
588 | } | |
589 | }; | |
590 | ||
591 | static int nvme_pr_command(struct block_device *bdev, u32 cdw10, | |
592 | u64 key, u64 sa_key, u8 op) | |
593 | { | |
594 | struct nvme_ns *ns = bdev->bd_disk->private_data; | |
595 | struct nvme_command c; | |
596 | u8 data[16] = { 0, }; | |
597 | ||
598 | put_unaligned_le64(key, &data[0]); | |
599 | put_unaligned_le64(sa_key, &data[8]); | |
600 | ||
601 | memset(&c, 0, sizeof(c)); | |
602 | c.common.opcode = op; | |
603 | c.common.nsid = cpu_to_le32(ns->ns_id); | |
604 | c.common.cdw10[0] = cpu_to_le32(cdw10); | |
605 | ||
606 | return nvme_submit_sync_cmd(ns->queue, &c, data, 16); | |
607 | } | |
608 | ||
609 | static int nvme_pr_register(struct block_device *bdev, u64 old, | |
610 | u64 new, unsigned flags) | |
611 | { | |
612 | u32 cdw10; | |
613 | ||
614 | if (flags & ~PR_FL_IGNORE_KEY) | |
615 | return -EOPNOTSUPP; | |
616 | ||
617 | cdw10 = old ? 2 : 0; | |
618 | cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; | |
619 | cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ | |
620 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); | |
621 | } | |
622 | ||
623 | static int nvme_pr_reserve(struct block_device *bdev, u64 key, | |
624 | enum pr_type type, unsigned flags) | |
625 | { | |
626 | u32 cdw10; | |
627 | ||
628 | if (flags & ~PR_FL_IGNORE_KEY) | |
629 | return -EOPNOTSUPP; | |
630 | ||
631 | cdw10 = nvme_pr_type(type) << 8; | |
632 | cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); | |
633 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); | |
634 | } | |
635 | ||
636 | static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, | |
637 | enum pr_type type, bool abort) | |
638 | { | |
639 | u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; | |
640 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); | |
641 | } | |
642 | ||
643 | static int nvme_pr_clear(struct block_device *bdev, u64 key) | |
644 | { | |
645 | u32 cdw10 = 1 | key ? 1 << 3 : 0; | |
646 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); | |
647 | } | |
648 | ||
649 | static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) | |
650 | { | |
651 | u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; | |
652 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); | |
653 | } | |
654 | ||
655 | static const struct pr_ops nvme_pr_ops = { | |
656 | .pr_register = nvme_pr_register, | |
657 | .pr_reserve = nvme_pr_reserve, | |
658 | .pr_release = nvme_pr_release, | |
659 | .pr_preempt = nvme_pr_preempt, | |
660 | .pr_clear = nvme_pr_clear, | |
661 | }; | |
662 | ||
663 | const struct block_device_operations nvme_fops = { | |
664 | .owner = THIS_MODULE, | |
665 | .ioctl = nvme_ioctl, | |
666 | .compat_ioctl = nvme_compat_ioctl, | |
667 | .open = nvme_open, | |
668 | .release = nvme_release, | |
669 | .getgeo = nvme_getgeo, | |
670 | .revalidate_disk= nvme_revalidate_disk, | |
671 | .pr_ops = &nvme_pr_ops, | |
672 | }; | |
673 | ||
5fd4ce1b CH |
674 | static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) |
675 | { | |
676 | unsigned long timeout = | |
677 | ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | |
678 | u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; | |
679 | int ret; | |
680 | ||
681 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { | |
682 | if ((csts & NVME_CSTS_RDY) == bit) | |
683 | break; | |
684 | ||
685 | msleep(100); | |
686 | if (fatal_signal_pending(current)) | |
687 | return -EINTR; | |
688 | if (time_after(jiffies, timeout)) { | |
689 | dev_err(ctrl->dev, | |
690 | "Device not ready; aborting %s\n", enabled ? | |
691 | "initialisation" : "reset"); | |
692 | return -ENODEV; | |
693 | } | |
694 | } | |
695 | ||
696 | return ret; | |
697 | } | |
698 | ||
699 | /* | |
700 | * If the device has been passed off to us in an enabled state, just clear | |
701 | * the enabled bit. The spec says we should set the 'shutdown notification | |
702 | * bits', but doing so may cause the device to complete commands to the | |
703 | * admin queue ... and we don't know what memory that might be pointing at! | |
704 | */ | |
705 | int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) | |
706 | { | |
707 | int ret; | |
708 | ||
709 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; | |
710 | ctrl->ctrl_config &= ~NVME_CC_ENABLE; | |
711 | ||
712 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | |
713 | if (ret) | |
714 | return ret; | |
715 | return nvme_wait_ready(ctrl, cap, false); | |
716 | } | |
717 | ||
718 | int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) | |
719 | { | |
720 | /* | |
721 | * Default to a 4K page size, with the intention to update this | |
722 | * path in the future to accomodate architectures with differing | |
723 | * kernel and IO page sizes. | |
724 | */ | |
725 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; | |
726 | int ret; | |
727 | ||
728 | if (page_shift < dev_page_min) { | |
729 | dev_err(ctrl->dev, | |
730 | "Minimum device page size %u too large for host (%u)\n", | |
731 | 1 << dev_page_min, 1 << page_shift); | |
732 | return -ENODEV; | |
733 | } | |
734 | ||
735 | ctrl->page_size = 1 << page_shift; | |
736 | ||
737 | ctrl->ctrl_config = NVME_CC_CSS_NVM; | |
738 | ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; | |
739 | ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | |
740 | ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | |
741 | ctrl->ctrl_config |= NVME_CC_ENABLE; | |
742 | ||
743 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | |
744 | if (ret) | |
745 | return ret; | |
746 | return nvme_wait_ready(ctrl, cap, true); | |
747 | } | |
748 | ||
749 | int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) | |
750 | { | |
751 | unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; | |
752 | u32 csts; | |
753 | int ret; | |
754 | ||
755 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; | |
756 | ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; | |
757 | ||
758 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | |
759 | if (ret) | |
760 | return ret; | |
761 | ||
762 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { | |
763 | if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) | |
764 | break; | |
765 | ||
766 | msleep(100); | |
767 | if (fatal_signal_pending(current)) | |
768 | return -EINTR; | |
769 | if (time_after(jiffies, timeout)) { | |
770 | dev_err(ctrl->dev, | |
771 | "Device shutdown incomplete; abort shutdown\n"); | |
772 | return -ENODEV; | |
773 | } | |
774 | } | |
775 | ||
776 | return ret; | |
777 | } | |
778 | ||
1673f1f0 CH |
779 | static void nvme_free_ctrl(struct kref *kref) |
780 | { | |
781 | struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); | |
782 | ||
783 | ctrl->ops->free_ctrl(ctrl); | |
784 | } | |
785 | ||
786 | void nvme_put_ctrl(struct nvme_ctrl *ctrl) | |
787 | { | |
788 | kref_put(&ctrl->kref, nvme_free_ctrl); | |
789 | } | |
790 |