]>
Commit | Line | Data |
---|---|---|
7f0f1ace KJ |
1 | /* |
2 | * QEMU NVM Express Virtual Namespace | |
3 | * | |
4 | * Copyright (c) 2019 CNEX Labs | |
5 | * Copyright (c) 2020 Samsung Electronics | |
6 | * | |
7 | * Authors: | |
8 | * Klaus Jensen <k.jensen@samsung.com> | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See the | |
11 | * COPYING file in the top-level directory. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "qemu/osdep.h" | |
16 | #include "qemu/units.h" | |
73064edf | 17 | #include "qemu/cutils.h" |
1b5804a8 | 18 | #include "qemu/error-report.h" |
7ef37c1c | 19 | #include "qapi/error.h" |
73064edf | 20 | #include "qemu/bitops.h" |
7f0f1ace KJ |
21 | #include "sysemu/sysemu.h" |
22 | #include "sysemu/block-backend.h" | |
7f0f1ace KJ |
23 | |
24 | #include "nvme.h" | |
7ef37c1c | 25 | #include "trace.h" |
7f0f1ace | 26 | |
2605257a | 27 | #define MIN_DISCARD_GRANULARITY (4 * KiB) |
de482d1f | 28 | #define NVME_DEFAULT_ZONE_SIZE (128 * MiB) |
2605257a | 29 | |
dc04d25e | 30 | void nvme_ns_init_format(NvmeNamespace *ns) |
7f0f1ace | 31 | { |
516990f4 | 32 | NvmeIdNs *id_ns = &ns->id_ns; |
2605257a | 33 | BlockDriverInfo bdi; |
1e64facc DT |
34 | int npdg, ret; |
35 | int64_t nlbas; | |
516990f4 | 36 | |
6146f3dd KJ |
37 | ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; |
38 | ns->lbasz = 1 << ns->lbaf.ds; | |
39 | ||
40 | nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); | |
516990f4 KJ |
41 | |
42 | id_ns->nsze = cpu_to_le64(nlbas); | |
43 | ||
44 | /* no thin provisioning */ | |
45 | id_ns->ncap = id_ns->nsze; | |
46 | id_ns->nuse = id_ns->ncap; | |
47 | ||
1e64facc | 48 | ns->moff = nlbas << ns->lbaf.ds; |
516990f4 | 49 | |
6146f3dd | 50 | npdg = ns->blkconf.discard_granularity / ns->lbasz; |
516990f4 KJ |
51 | |
52 | ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); | |
53 | if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { | |
6146f3dd | 54 | npdg = bdi.cluster_size / ns->lbasz; |
516990f4 KJ |
55 | } |
56 | ||
57 | id_ns->npda = id_ns->npdg = npdg - 1; | |
58 | } | |
59 | ||
60 | static int nvme_ns_init(NvmeNamespace *ns, Error **errp) | |
61 | { | |
3276dde4 | 62 | static uint64_t ns_count; |
7f0f1ace | 63 | NvmeIdNs *id_ns = &ns->id_ns; |
44219b60 | 64 | NvmeIdNsNvm *id_ns_nvm = &ns->id_ns_nvm; |
6a674bc2 MI |
65 | uint8_t ds; |
66 | uint16_t ms; | |
67 | int i; | |
7f0f1ace | 68 | |
516990f4 | 69 | ns->csi = NVME_CSI_NVM; |
dc04d25e | 70 | ns->status = 0x0; |
516990f4 | 71 | |
146f720c | 72 | ns->id_ns.dlfeat = 0x1; |
7f0f1ace | 73 | |
516990f4 KJ |
74 | /* support DULBE and I/O optimization fields */ |
75 | id_ns->nsfeat |= (0x4 | 0x10); | |
76 | ||
e5489356 | 77 | if (ns->params.shared) { |
516990f4 KJ |
78 | id_ns->nmic |= NVME_NMIC_NS_SHARED; |
79 | } | |
80 | ||
3276dde4 HS |
81 | /* Substitute a missing EUI-64 by an autogenerated one */ |
82 | ++ns_count; | |
83 | if (!ns->params.eui64 && ns->params.eui64_default) { | |
84 | ns->params.eui64 = ns_count + NVME_EUI64_DEFAULT; | |
85 | } | |
86 | ||
516990f4 KJ |
87 | /* simple copy */ |
88 | id_ns->mssrl = cpu_to_le16(ns->params.mssrl); | |
89 | id_ns->mcl = cpu_to_le32(ns->params.mcl); | |
90 | id_ns->msrc = ns->params.msrc; | |
6870cfb8 | 91 | id_ns->eui64 = cpu_to_be64(ns->params.eui64); |
516990f4 | 92 | |
6a674bc2 MI |
93 | ds = 31 - clz32(ns->blkconf.logical_block_size); |
94 | ms = ns->params.ms; | |
7f0f1ace | 95 | |
421a3092 | 96 | id_ns->mc = NVME_ID_NS_MC_EXTENDED | NVME_ID_NS_MC_SEPARATE; |
bc3a65e9 | 97 | |
421a3092 GA |
98 | if (ms && ns->params.mset) { |
99 | id_ns->flbas |= NVME_ID_NS_FLBAS_EXTENDED; | |
100 | } | |
146f720c | 101 | |
421a3092 GA |
102 | id_ns->dpc = 0x1f; |
103 | id_ns->dps = ns->params.pi; | |
104 | if (ns->params.pi && ns->params.pil) { | |
105 | id_ns->dps |= NVME_ID_NS_DPS_FIRST_EIGHT; | |
6a674bc2 MI |
106 | } |
107 | ||
44219b60 NN |
108 | ns->pif = ns->params.pif; |
109 | ||
f193d0bd | 110 | static const NvmeLBAF defaults[16] = { |
421a3092 GA |
111 | [0] = { .ds = 9 }, |
112 | [1] = { .ds = 9, .ms = 8 }, | |
113 | [2] = { .ds = 9, .ms = 16 }, | |
114 | [3] = { .ds = 9, .ms = 64 }, | |
115 | [4] = { .ds = 12 }, | |
116 | [5] = { .ds = 12, .ms = 8 }, | |
117 | [6] = { .ds = 12, .ms = 16 }, | |
118 | [7] = { .ds = 12, .ms = 64 }, | |
119 | }; | |
120 | ||
763c05df NN |
121 | ns->nlbaf = 8; |
122 | ||
f193d0bd | 123 | memcpy(&id_ns->lbaf, &defaults, sizeof(defaults)); |
421a3092 | 124 | |
763c05df | 125 | for (i = 0; i < ns->nlbaf; i++) { |
6a674bc2 MI |
126 | NvmeLBAF *lbaf = &id_ns->lbaf[i]; |
127 | if (lbaf->ds == ds) { | |
128 | if (lbaf->ms == ms) { | |
129 | id_ns->flbas |= i; | |
130 | goto lbaf_found; | |
131 | } | |
132 | } | |
bc3a65e9 KJ |
133 | } |
134 | ||
6a674bc2 | 135 | /* add non-standard lba format */ |
763c05df NN |
136 | id_ns->lbaf[ns->nlbaf].ds = ds; |
137 | id_ns->lbaf[ns->nlbaf].ms = ms; | |
138 | ns->nlbaf++; | |
139 | ||
140 | id_ns->flbas |= i; | |
6a674bc2 | 141 | |
44219b60 | 142 | |
6a674bc2 | 143 | lbaf_found: |
44219b60 | 144 | id_ns_nvm->elbaf[i] = (ns->pif & 0x3) << 7; |
763c05df | 145 | id_ns->nlbaf = ns->nlbaf - 1; |
516990f4 | 146 | nvme_ns_init_format(ns); |
e4e430b3 | 147 | |
2605257a | 148 | return 0; |
7f0f1ace KJ |
149 | } |
150 | ||
337ccd76 | 151 | static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) |
7f0f1ace | 152 | { |
86b1cf32 KW |
153 | bool read_only; |
154 | ||
7f0f1ace KJ |
155 | if (!blkconf_blocksizes(&ns->blkconf, errp)) { |
156 | return -1; | |
157 | } | |
158 | ||
86b1cf32 KW |
159 | read_only = !blk_supports_write_perm(ns->blkconf.blk); |
160 | if (!blkconf_apply_backend_options(&ns->blkconf, read_only, false, errp)) { | |
7f0f1ace KJ |
161 | return -1; |
162 | } | |
163 | ||
2605257a KJ |
164 | if (ns->blkconf.discard_granularity == -1) { |
165 | ns->blkconf.discard_granularity = | |
166 | MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY); | |
167 | } | |
168 | ||
7f0f1ace KJ |
169 | ns->size = blk_getlength(ns->blkconf.blk); |
170 | if (ns->size < 0) { | |
171 | error_setg_errno(errp, -ns->size, "could not get blockdev size"); | |
172 | return -1; | |
173 | } | |
174 | ||
7f0f1ace KJ |
175 | return 0; |
176 | } | |
177 | ||
a479335b DF |
178 | static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) |
179 | { | |
180 | uint64_t zone_size, zone_cap; | |
a479335b DF |
181 | |
182 | /* Make sure that the values of ZNS properties are sane */ | |
183 | if (ns->params.zone_size_bs) { | |
184 | zone_size = ns->params.zone_size_bs; | |
185 | } else { | |
186 | zone_size = NVME_DEFAULT_ZONE_SIZE; | |
187 | } | |
188 | if (ns->params.zone_cap_bs) { | |
189 | zone_cap = ns->params.zone_cap_bs; | |
190 | } else { | |
191 | zone_cap = zone_size; | |
192 | } | |
193 | if (zone_cap > zone_size) { | |
194 | error_setg(errp, "zone capacity %"PRIu64"B exceeds " | |
195 | "zone size %"PRIu64"B", zone_cap, zone_size); | |
196 | return -1; | |
197 | } | |
6146f3dd | 198 | if (zone_size < ns->lbasz) { |
a479335b | 199 | error_setg(errp, "zone size %"PRIu64"B too small, " |
6146f3dd | 200 | "must be at least %zuB", zone_size, ns->lbasz); |
a479335b DF |
201 | return -1; |
202 | } | |
6146f3dd | 203 | if (zone_cap < ns->lbasz) { |
a479335b | 204 | error_setg(errp, "zone capacity %"PRIu64"B too small, " |
6146f3dd | 205 | "must be at least %zuB", zone_cap, ns->lbasz); |
a479335b DF |
206 | return -1; |
207 | } | |
208 | ||
209 | /* | |
210 | * Save the main zone geometry values to avoid | |
211 | * calculating them later again. | |
212 | */ | |
6146f3dd KJ |
213 | ns->zone_size = zone_size / ns->lbasz; |
214 | ns->zone_capacity = zone_cap / ns->lbasz; | |
215 | ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; | |
8d18ddcd DF |
216 | |
217 | /* Do a few more sanity checks of ZNS properties */ | |
044f1876 MI |
218 | if (!ns->num_zones) { |
219 | error_setg(errp, | |
220 | "insufficient drive capacity, must be at least the size " | |
221 | "of one zone (%"PRIu64"B)", zone_size); | |
222 | return -1; | |
223 | } | |
224 | ||
a479335b DF |
225 | return 0; |
226 | } | |
227 | ||
228 | static void nvme_ns_zoned_init_state(NvmeNamespace *ns) | |
229 | { | |
230 | uint64_t start = 0, zone_size = ns->zone_size; | |
231 | uint64_t capacity = ns->num_zones * zone_size; | |
232 | NvmeZone *zone; | |
233 | int i; | |
234 | ||
235 | ns->zone_array = g_new0(NvmeZone, ns->num_zones); | |
1a9290ad DF |
236 | if (ns->params.zd_extension_size) { |
237 | ns->zd_extensions = g_malloc0(ns->params.zd_extension_size * | |
238 | ns->num_zones); | |
239 | } | |
a479335b DF |
240 | |
241 | QTAILQ_INIT(&ns->exp_open_zones); | |
242 | QTAILQ_INIT(&ns->imp_open_zones); | |
243 | QTAILQ_INIT(&ns->closed_zones); | |
244 | QTAILQ_INIT(&ns->full_zones); | |
245 | ||
246 | zone = ns->zone_array; | |
247 | for (i = 0; i < ns->num_zones; i++, zone++) { | |
248 | if (start + zone_size > capacity) { | |
249 | zone_size = capacity - start; | |
250 | } | |
251 | zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE; | |
252 | nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); | |
253 | zone->d.za = 0; | |
254 | zone->d.zcap = ns->zone_capacity; | |
255 | zone->d.zslba = start; | |
256 | zone->d.wp = start; | |
257 | zone->w_ptr = start; | |
258 | start += zone_size; | |
259 | } | |
260 | ||
261 | ns->zone_size_log2 = 0; | |
262 | if (is_power_of_2(ns->zone_size)) { | |
263 | ns->zone_size_log2 = 63 - clz64(ns->zone_size); | |
264 | } | |
265 | } | |
266 | ||
6a674bc2 | 267 | static void nvme_ns_init_zoned(NvmeNamespace *ns) |
a479335b DF |
268 | { |
269 | NvmeIdNsZoned *id_ns_z; | |
6a674bc2 | 270 | int i; |
a479335b DF |
271 | |
272 | nvme_ns_zoned_init_state(ns); | |
273 | ||
b21e2380 | 274 | id_ns_z = g_new0(NvmeIdNsZoned, 1); |
a479335b | 275 | |
312c3531 | 276 | /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ |
8d18ddcd DF |
277 | id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); |
278 | id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); | |
a479335b | 279 | id_ns_z->zoc = 0; |
25872031 KJ |
280 | id_ns_z->ozcs = ns->params.cross_zone_read ? |
281 | NVME_ID_NS_ZONED_OZCS_RAZB : 0x00; | |
a479335b | 282 | |
6a674bc2 MI |
283 | for (i = 0; i <= ns->id_ns.nlbaf; i++) { |
284 | id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size); | |
285 | id_ns_z->lbafe[i].zdes = | |
286 | ns->params.zd_extension_size >> 6; /* Units of 64B */ | |
287 | } | |
a479335b | 288 | |
e321b4cd KJ |
289 | if (ns->params.zrwas) { |
290 | ns->zns.numzrwa = ns->params.numzrwa ? | |
291 | ns->params.numzrwa : ns->num_zones; | |
292 | ||
293 | ns->zns.zrwas = ns->params.zrwas >> ns->lbaf.ds; | |
294 | ns->zns.zrwafg = ns->params.zrwafg >> ns->lbaf.ds; | |
295 | ||
296 | id_ns_z->ozcs |= NVME_ID_NS_ZONED_OZCS_ZRWASUP; | |
297 | id_ns_z->zrwacap = NVME_ID_NS_ZONED_ZRWACAP_EXPFLUSHSUP; | |
298 | ||
299 | id_ns_z->numzrwa = cpu_to_le32(ns->params.numzrwa); | |
300 | id_ns_z->zrwas = cpu_to_le16(ns->zns.zrwas); | |
301 | id_ns_z->zrwafg = cpu_to_le16(ns->zns.zrwafg); | |
302 | } | |
303 | ||
304 | id_ns_z->ozcs = cpu_to_le16(id_ns_z->ozcs); | |
305 | ||
a479335b DF |
306 | ns->csi = NVME_CSI_ZONED; |
307 | ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size); | |
308 | ns->id_ns.ncap = ns->id_ns.nsze; | |
309 | ns->id_ns.nuse = ns->id_ns.ncap; | |
310 | ||
1b5804a8 KJ |
311 | /* |
312 | * The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated" | |
313 | * status of logical blocks. Since the spec defines that logical blocks | |
314 | * SHALL be deallocated when then zone is in the Empty or Offline states, | |
315 | * we can only support DULBE if the zone size is a multiple of the | |
316 | * calculated NPDG. | |
317 | */ | |
318 | if (ns->zone_size % (ns->id_ns.npdg + 1)) { | |
319 | warn_report("the zone size (%"PRIu64" blocks) is not a multiple of " | |
320 | "the calculated deallocation granularity (%d blocks); " | |
321 | "DULBE support disabled", | |
322 | ns->zone_size, ns->id_ns.npdg + 1); | |
323 | ||
324 | ns->id_ns.nsfeat &= ~0x4; | |
325 | } | |
326 | ||
a479335b DF |
327 | ns->id_ns_zoned = id_ns_z; |
328 | } | |
329 | ||
330 | static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone) | |
331 | { | |
332 | uint8_t state; | |
333 | ||
334 | zone->w_ptr = zone->d.wp; | |
335 | state = nvme_get_zone_state(zone); | |
1a9290ad DF |
336 | if (zone->d.wp != zone->d.zslba || |
337 | (zone->d.za & NVME_ZA_ZD_EXT_VALID)) { | |
a479335b DF |
338 | if (state != NVME_ZONE_STATE_CLOSED) { |
339 | trace_pci_nvme_clear_ns_close(state, zone->d.zslba); | |
340 | nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED); | |
341 | } | |
8d18ddcd | 342 | nvme_aor_inc_active(ns); |
a479335b DF |
343 | QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry); |
344 | } else { | |
345 | trace_pci_nvme_clear_ns_reset(state, zone->d.zslba); | |
e321b4cd KJ |
346 | if (zone->d.za & NVME_ZA_ZRWA_VALID) { |
347 | zone->d.za &= ~NVME_ZA_ZRWA_VALID; | |
348 | ns->zns.numzrwa++; | |
349 | } | |
a479335b DF |
350 | nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY); |
351 | } | |
352 | } | |
353 | ||
354 | /* | |
355 | * Close all the zones that are currently open. | |
356 | */ | |
357 | static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) | |
358 | { | |
359 | NvmeZone *zone, *next; | |
360 | ||
361 | QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) { | |
362 | QTAILQ_REMOVE(&ns->closed_zones, zone, entry); | |
8d18ddcd | 363 | nvme_aor_dec_active(ns); |
a479335b DF |
364 | nvme_clear_zone(ns, zone); |
365 | } | |
366 | QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) { | |
367 | QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry); | |
8d18ddcd DF |
368 | nvme_aor_dec_open(ns); |
369 | nvme_aor_dec_active(ns); | |
a479335b DF |
370 | nvme_clear_zone(ns, zone); |
371 | } | |
372 | QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) { | |
373 | QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry); | |
8d18ddcd DF |
374 | nvme_aor_dec_open(ns); |
375 | nvme_aor_dec_active(ns); | |
a479335b DF |
376 | nvme_clear_zone(ns, zone); |
377 | } | |
8d18ddcd DF |
378 | |
379 | assert(ns->nr_open_zones == 0); | |
a479335b DF |
380 | } |
381 | ||
73064edf JD |
382 | static NvmeRuHandle *nvme_find_ruh_by_attr(NvmeEnduranceGroup *endgrp, |
383 | uint8_t ruha, uint16_t *ruhid) | |
384 | { | |
385 | for (uint16_t i = 0; i < endgrp->fdp.nruh; i++) { | |
386 | NvmeRuHandle *ruh = &endgrp->fdp.ruhs[i]; | |
387 | ||
388 | if (ruh->ruha == ruha) { | |
389 | *ruhid = i; | |
390 | return ruh; | |
391 | } | |
392 | } | |
393 | ||
394 | return NULL; | |
395 | } | |
396 | ||
397 | static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp) | |
398 | { | |
399 | NvmeEnduranceGroup *endgrp = ns->endgrp; | |
400 | NvmeRuHandle *ruh; | |
401 | uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); | |
cb16e5c7 | 402 | g_autofree unsigned int *ruhids = NULL; |
ce801773 KJ |
403 | unsigned int n, m, *ruhid; |
404 | const char *endptr, *token; | |
405 | char *r, *p; | |
73064edf JD |
406 | uint16_t *ph; |
407 | ||
408 | if (!ns->params.fdp.ruhs) { | |
409 | ns->fdp.nphs = 1; | |
410 | ph = ns->fdp.phs = g_new(uint16_t, 1); | |
411 | ||
412 | ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_CTRL, ph); | |
413 | if (!ruh) { | |
414 | ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_UNUSED, ph); | |
415 | if (!ruh) { | |
416 | error_setg(errp, "no unused reclaim unit handles left"); | |
417 | return false; | |
418 | } | |
419 | ||
420 | ruh->ruha = NVME_RUHA_CTRL; | |
421 | ruh->lbafi = lbafi; | |
422 | ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; | |
423 | ||
424 | for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { | |
425 | ruh->rus[rg].ruamw = ruh->ruamw; | |
426 | } | |
427 | } else if (ruh->lbafi != lbafi) { | |
428 | error_setg(errp, "lba format index of controller assigned " | |
429 | "reclaim unit handle does not match namespace lba " | |
430 | "format index"); | |
431 | return false; | |
432 | } | |
433 | ||
434 | return true; | |
435 | } | |
436 | ||
437 | ruhid = ruhids = g_new0(unsigned int, endgrp->fdp.nruh); | |
438 | r = p = strdup(ns->params.fdp.ruhs); | |
439 | ||
440 | /* parse the placement handle identifiers */ | |
441 | while ((token = qemu_strsep(&p, ";")) != NULL) { | |
ce801773 KJ |
442 | if (qemu_strtoui(token, &endptr, 0, &n) < 0) { |
443 | error_setg(errp, "cannot parse reclaim unit handle identifier"); | |
73064edf JD |
444 | free(r); |
445 | return false; | |
446 | } | |
447 | ||
ce801773 KJ |
448 | m = n; |
449 | ||
450 | /* parse range */ | |
451 | if (*endptr == '-') { | |
452 | token = endptr + 1; | |
453 | ||
454 | if (qemu_strtoui(token, NULL, 0, &m) < 0) { | |
455 | error_setg(errp, "cannot parse reclaim unit handle identifier"); | |
456 | free(r); | |
457 | return false; | |
458 | } | |
459 | ||
460 | if (m < n) { | |
461 | error_setg(errp, "invalid reclaim unit handle identifier range"); | |
462 | free(r); | |
463 | return false; | |
464 | } | |
465 | } | |
466 | ||
467 | for (; n <= m; n++) { | |
468 | if (ns->fdp.nphs++ == endgrp->fdp.nruh) { | |
469 | error_setg(errp, "too many placement handles"); | |
470 | free(r); | |
471 | return false; | |
472 | } | |
473 | ||
474 | *ruhid++ = n; | |
73064edf JD |
475 | } |
476 | } | |
477 | ||
478 | free(r); | |
479 | ||
94fa8ca7 KJ |
480 | /* verify that the ruhids are unique */ |
481 | for (unsigned int i = 0; i < ns->fdp.nphs; i++) { | |
482 | for (unsigned int j = i + 1; j < ns->fdp.nphs; j++) { | |
483 | if (ruhids[i] == ruhids[j]) { | |
484 | error_setg(errp, "duplicate reclaim unit handle identifier: %u", | |
485 | ruhids[i]); | |
486 | return false; | |
487 | } | |
488 | } | |
489 | } | |
490 | ||
73064edf JD |
491 | ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs); |
492 | ||
493 | ruhid = ruhids; | |
494 | ||
495 | /* verify the identifiers */ | |
496 | for (unsigned int i = 0; i < ns->fdp.nphs; i++, ruhid++, ph++) { | |
497 | if (*ruhid >= endgrp->fdp.nruh) { | |
498 | error_setg(errp, "invalid reclaim unit handle identifier"); | |
499 | return false; | |
500 | } | |
501 | ||
502 | ruh = &endgrp->fdp.ruhs[*ruhid]; | |
503 | ||
504 | switch (ruh->ruha) { | |
505 | case NVME_RUHA_UNUSED: | |
506 | ruh->ruha = NVME_RUHA_HOST; | |
507 | ruh->lbafi = lbafi; | |
508 | ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; | |
509 | ||
510 | for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { | |
511 | ruh->rus[rg].ruamw = ruh->ruamw; | |
512 | } | |
513 | ||
514 | break; | |
515 | ||
516 | case NVME_RUHA_HOST: | |
517 | if (ruh->lbafi != lbafi) { | |
518 | error_setg(errp, "lba format index of host assigned" | |
519 | "reclaim unit handle does not match namespace " | |
520 | "lba format index"); | |
521 | return false; | |
522 | } | |
523 | ||
524 | break; | |
525 | ||
526 | case NVME_RUHA_CTRL: | |
527 | error_setg(errp, "reclaim unit handle is controller assigned"); | |
528 | return false; | |
529 | ||
530 | default: | |
531 | abort(); | |
532 | } | |
533 | ||
534 | *ph = *ruhid; | |
535 | } | |
536 | ||
537 | return true; | |
538 | } | |
539 | ||
5e4f6bcc | 540 | static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) |
7f0f1ace | 541 | { |
44219b60 NN |
542 | unsigned int pi_size; |
543 | ||
7f0f1ace KJ |
544 | if (!ns->blkconf.blk) { |
545 | error_setg(errp, "block backend not configured"); | |
546 | return -1; | |
547 | } | |
548 | ||
44219b60 NN |
549 | if (ns->params.pi) { |
550 | if (ns->params.pi > NVME_ID_NS_DPS_TYPE_3) { | |
551 | error_setg(errp, "invalid 'pi' value"); | |
552 | return -1; | |
553 | } | |
554 | ||
555 | switch (ns->params.pif) { | |
556 | case NVME_PI_GUARD_16: | |
557 | pi_size = 8; | |
558 | break; | |
559 | case NVME_PI_GUARD_64: | |
560 | pi_size = 16; | |
561 | break; | |
562 | default: | |
563 | error_setg(errp, "invalid 'pif'"); | |
564 | return -1; | |
565 | } | |
566 | ||
567 | if (ns->params.ms < pi_size) { | |
568 | error_setg(errp, "at least %u bytes of metadata required to " | |
569 | "enable protection information", pi_size); | |
570 | return -1; | |
571 | } | |
146f720c KJ |
572 | } |
573 | ||
e5489356 KJ |
574 | if (ns->params.nsid > NVME_MAX_NAMESPACES) { |
575 | error_setg(errp, "invalid namespace id (must be between 0 and %d)", | |
576 | NVME_MAX_NAMESPACES); | |
577 | return -1; | |
578 | } | |
579 | ||
73064edf JD |
580 | if (ns->params.zoned && ns->endgrp && ns->endgrp->fdp.enabled) { |
581 | error_setg(errp, "cannot be a zoned- in an FDP configuration"); | |
582 | return -1; | |
583 | } | |
584 | ||
49ad39c5 KJ |
585 | if (ns->params.zoned) { |
586 | if (ns->params.max_active_zones) { | |
587 | if (ns->params.max_open_zones > ns->params.max_active_zones) { | |
588 | error_setg(errp, "max_open_zones (%u) exceeds " | |
589 | "max_active_zones (%u)", ns->params.max_open_zones, | |
590 | ns->params.max_active_zones); | |
591 | return -1; | |
592 | } | |
593 | ||
594 | if (!ns->params.max_open_zones) { | |
595 | ns->params.max_open_zones = ns->params.max_active_zones; | |
596 | } | |
597 | } | |
598 | ||
599 | if (ns->params.zd_extension_size) { | |
600 | if (ns->params.zd_extension_size & 0x3f) { | |
601 | error_setg(errp, "zone descriptor extension size must be a " | |
602 | "multiple of 64B"); | |
603 | return -1; | |
604 | } | |
605 | if ((ns->params.zd_extension_size >> 6) > 0xff) { | |
606 | error_setg(errp, | |
607 | "zone descriptor extension size is too large"); | |
608 | return -1; | |
609 | } | |
610 | } | |
e321b4cd KJ |
611 | |
612 | if (ns->params.zrwas) { | |
613 | if (ns->params.zrwas % ns->blkconf.logical_block_size) { | |
614 | error_setg(errp, "zone random write area size (zoned.zrwas " | |
615 | "%"PRIu64") must be a multiple of the logical " | |
616 | "block size (logical_block_size %"PRIu32")", | |
617 | ns->params.zrwas, ns->blkconf.logical_block_size); | |
618 | return -1; | |
619 | } | |
620 | ||
621 | if (ns->params.zrwafg == -1) { | |
622 | ns->params.zrwafg = ns->blkconf.logical_block_size; | |
623 | } | |
624 | ||
625 | if (ns->params.zrwas % ns->params.zrwafg) { | |
626 | error_setg(errp, "zone random write area size (zoned.zrwas " | |
627 | "%"PRIu64") must be a multiple of the zone random " | |
628 | "write area flush granularity (zoned.zrwafg, " | |
629 | "%"PRIu64")", ns->params.zrwas, ns->params.zrwafg); | |
630 | return -1; | |
631 | } | |
632 | ||
633 | if (ns->params.max_active_zones) { | |
634 | if (ns->params.numzrwa > ns->params.max_active_zones) { | |
635 | error_setg(errp, "number of zone random write area " | |
636 | "resources (zoned.numzrwa, %d) must be less " | |
637 | "than or equal to maximum active resources " | |
638 | "(zoned.max_active_zones, %d)", | |
639 | ns->params.numzrwa, | |
640 | ns->params.max_active_zones); | |
641 | return -1; | |
642 | } | |
643 | } | |
644 | } | |
49ad39c5 KJ |
645 | } |
646 | ||
7f0f1ace KJ |
647 | return 0; |
648 | } | |
649 | ||
5e4f6bcc | 650 | int nvme_ns_setup(NvmeNamespace *ns, Error **errp) |
7f0f1ace | 651 | { |
5e4f6bcc | 652 | if (nvme_ns_check_constraints(ns, errp)) { |
7f0f1ace KJ |
653 | return -1; |
654 | } | |
655 | ||
337ccd76 | 656 | if (nvme_ns_init_blk(ns, errp)) { |
7f0f1ace KJ |
657 | return -1; |
658 | } | |
659 | ||
2605257a KJ |
660 | if (nvme_ns_init(ns, errp)) { |
661 | return -1; | |
662 | } | |
a479335b DF |
663 | if (ns->params.zoned) { |
664 | if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) { | |
665 | return -1; | |
666 | } | |
6a674bc2 | 667 | nvme_ns_init_zoned(ns); |
a479335b | 668 | } |
54064e51 | 669 | |
73064edf JD |
670 | if (ns->endgrp && ns->endgrp->fdp.enabled) { |
671 | if (!nvme_ns_init_fdp(ns, errp)) { | |
672 | return -1; | |
673 | } | |
674 | } | |
675 | ||
7f0f1ace KJ |
676 | return 0; |
677 | } | |
678 | ||
679 | void nvme_ns_drain(NvmeNamespace *ns) | |
680 | { | |
681 | blk_drain(ns->blkconf.blk); | |
682 | } | |
683 | ||
ba69f224 | 684 | void nvme_ns_shutdown(NvmeNamespace *ns) |
7f0f1ace KJ |
685 | { |
686 | blk_flush(ns->blkconf.blk); | |
a479335b DF |
687 | if (ns->params.zoned) { |
688 | nvme_zoned_ns_shutdown(ns); | |
689 | } | |
690 | } | |
691 | ||
692 | void nvme_ns_cleanup(NvmeNamespace *ns) | |
693 | { | |
694 | if (ns->params.zoned) { | |
695 | g_free(ns->id_ns_zoned); | |
696 | g_free(ns->zone_array); | |
1a9290ad | 697 | g_free(ns->zd_extensions); |
a479335b | 698 | } |
73064edf JD |
699 | |
700 | if (ns->endgrp && ns->endgrp->fdp.enabled) { | |
701 | g_free(ns->fdp.phs); | |
702 | } | |
7f0f1ace KJ |
703 | } |
704 | ||
5ffbaeed KJ |
705 | static void nvme_ns_unrealize(DeviceState *dev) |
706 | { | |
707 | NvmeNamespace *ns = NVME_NS(dev); | |
708 | ||
709 | nvme_ns_drain(ns); | |
710 | nvme_ns_shutdown(ns); | |
711 | nvme_ns_cleanup(ns); | |
712 | } | |
713 | ||
7f0f1ace KJ |
714 | static void nvme_ns_realize(DeviceState *dev, Error **errp) |
715 | { | |
716 | NvmeNamespace *ns = NVME_NS(dev); | |
717 | BusState *s = qdev_get_parent_bus(dev); | |
718 | NvmeCtrl *n = NVME(s->parent); | |
e5489356 KJ |
719 | NvmeSubsystem *subsys = n->subsys; |
720 | uint32_t nsid = ns->params.nsid; | |
721 | int i; | |
7f0f1ace | 722 | |
5e4f6bcc | 723 | if (!n->subsys) { |
dfa82ac2 NC |
724 | /* If no subsys, the ns cannot be attached to more than one ctrl. */ |
725 | ns->params.shared = false; | |
5e4f6bcc KJ |
726 | if (ns->params.detached) { |
727 | error_setg(errp, "detached requires that the nvme device is " | |
728 | "linked to an nvme-subsys device"); | |
729 | return; | |
730 | } | |
5ffbaeed KJ |
731 | } else { |
732 | /* | |
733 | * If this namespace belongs to a subsystem (through a link on the | |
734 | * controller device), reparent the device. | |
735 | */ | |
736 | if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { | |
737 | return; | |
738 | } | |
534a93d3 | 739 | ns->subsys = subsys; |
73064edf | 740 | ns->endgrp = &subsys->endgrp; |
5e4f6bcc KJ |
741 | } |
742 | ||
743 | if (nvme_ns_setup(ns, errp)) { | |
7f0f1ace KJ |
744 | return; |
745 | } | |
15d024d4 | 746 | |
e5489356 KJ |
747 | if (!nsid) { |
748 | for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { | |
749 | if (nvme_ns(n, i) || nvme_subsys_ns(subsys, i)) { | |
750 | continue; | |
751 | } | |
752 | ||
753 | nsid = ns->params.nsid = i; | |
754 | break; | |
755 | } | |
756 | ||
757 | if (!nsid) { | |
758 | error_setg(errp, "no free namespace id"); | |
e5707685 MI |
759 | return; |
760 | } | |
761 | } else { | |
e5489356 KJ |
762 | if (nvme_ns(n, nsid) || nvme_subsys_ns(subsys, nsid)) { |
763 | error_setg(errp, "namespace id '%d' already allocated", nsid); | |
e5707685 MI |
764 | return; |
765 | } | |
15d024d4 | 766 | } |
e5489356 KJ |
767 | |
768 | if (subsys) { | |
769 | subsys->namespaces[nsid] = ns; | |
770 | ||
771dbc3a KJ |
771 | ns->id_ns.endgid = cpu_to_le16(0x1); |
772 | ||
e5489356 KJ |
773 | if (ns->params.detached) { |
774 | return; | |
775 | } | |
776 | ||
777 | if (ns->params.shared) { | |
778 | for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { | |
779 | NvmeCtrl *ctrl = subsys->ctrls[i]; | |
780 | ||
99f48ae7 | 781 | if (ctrl && ctrl != SUBSYS_SLOT_RSVD) { |
e5489356 KJ |
782 | nvme_attach_ns(ctrl, ns); |
783 | } | |
784 | } | |
785 | ||
786 | return; | |
787 | } | |
771dbc3a | 788 | |
e5489356 KJ |
789 | } |
790 | ||
791 | nvme_attach_ns(n, ns); | |
7f0f1ace KJ |
792 | } |
793 | ||
794 | static Property nvme_ns_props[] = { | |
795 | DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), | |
037953b5 | 796 | DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), |
916b0f0b | 797 | DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), |
7f0f1ace | 798 | DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), |
bd9f371c | 799 | DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid), |
6870cfb8 | 800 | DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), |
bc3a65e9 KJ |
801 | DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), |
802 | DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), | |
146f720c KJ |
803 | DEFINE_PROP_UINT8("pi", NvmeNamespace, params.pi, 0), |
804 | DEFINE_PROP_UINT8("pil", NvmeNamespace, params.pil, 0), | |
44219b60 | 805 | DEFINE_PROP_UINT8("pif", NvmeNamespace, params.pif, 0), |
e4e430b3 KJ |
806 | DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128), |
807 | DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128), | |
808 | DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127), | |
a479335b DF |
809 | DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false), |
810 | DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs, | |
811 | NVME_DEFAULT_ZONE_SIZE), | |
812 | DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs, | |
813 | 0), | |
814 | DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace, | |
815 | params.cross_zone_read, false), | |
8d18ddcd DF |
816 | DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace, |
817 | params.max_active_zones, 0), | |
818 | DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace, | |
819 | params.max_open_zones, 0), | |
1a9290ad DF |
820 | DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace, |
821 | params.zd_extension_size, 0), | |
e321b4cd KJ |
822 | DEFINE_PROP_UINT32("zoned.numzrwa", NvmeNamespace, params.numzrwa, 0), |
823 | DEFINE_PROP_SIZE("zoned.zrwas", NvmeNamespace, params.zrwas, 0), | |
824 | DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), | |
3276dde4 | 825 | DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, |
36d83272 | 826 | false), |
73064edf | 827 | DEFINE_PROP_STRING("fdp.ruhs", NvmeNamespace, params.fdp.ruhs), |
7f0f1ace KJ |
828 | DEFINE_PROP_END_OF_LIST(), |
829 | }; | |
830 | ||
831 | static void nvme_ns_class_init(ObjectClass *oc, void *data) | |
832 | { | |
833 | DeviceClass *dc = DEVICE_CLASS(oc); | |
834 | ||
835 | set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); | |
836 | ||
837 | dc->bus_type = TYPE_NVME_BUS; | |
838 | dc->realize = nvme_ns_realize; | |
5ffbaeed | 839 | dc->unrealize = nvme_ns_unrealize; |
7f0f1ace KJ |
840 | device_class_set_props(dc, nvme_ns_props); |
841 | dc->desc = "Virtual NVMe namespace"; | |
842 | } | |
843 | ||
844 | static void nvme_ns_instance_init(Object *obj) | |
845 | { | |
846 | NvmeNamespace *ns = NVME_NS(obj); | |
847 | char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid); | |
848 | ||
849 | device_add_bootindex_property(obj, &ns->bootindex, "bootindex", | |
850 | bootindex, DEVICE(obj)); | |
851 | ||
852 | g_free(bootindex); | |
853 | } | |
854 | ||
855 | static const TypeInfo nvme_ns_info = { | |
856 | .name = TYPE_NVME_NS, | |
857 | .parent = TYPE_DEVICE, | |
858 | .class_init = nvme_ns_class_init, | |
859 | .instance_size = sizeof(NvmeNamespace), | |
860 | .instance_init = nvme_ns_instance_init, | |
861 | }; | |
862 | ||
863 | static void nvme_ns_register_types(void) | |
864 | { | |
865 | type_register_static(&nvme_ns_info); | |
866 | } | |
867 | ||
868 | type_init(nvme_ns_register_types) |