]> git.proxmox.com Git - mirror_qemu.git/blame - hw/ppc/spapr_nvdimm.c
nvdimm: Reject writing label data to ROM instead of crashing QEMU
[mirror_qemu.git] / hw / ppc / spapr_nvdimm.c
CommitLineData
ee3a71e3
SB
1/*
2 * QEMU PAPR Storage Class Memory Interfaces
3 *
4 * Copyright (c) 2019-2020, IBM Corporation.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#include "qemu/osdep.h"
b5513584 25#include "qemu/cutils.h"
ee3a71e3
SB
26#include "qapi/error.h"
27#include "hw/ppc/spapr_drc.h"
28#include "hw/ppc/spapr_nvdimm.h"
29#include "hw/mem/nvdimm.h"
30#include "qemu/nvdimm-utils.h"
31#include "hw/ppc/fdt.h"
b5fca656 32#include "qemu/range.h"
f1aa45ff 33#include "hw/ppc/spapr_numa.h"
b5513584
SB
34#include "block/thread-pool.h"
35#include "migration/vmstate.h"
36#include "qemu/pmem.h"
8601b4f1 37#include "hw/qdev-properties.h"
ee3a71e3 38
53d7d7e2
VJ
39/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
40/* SCM device is unable to persist memory contents */
41#define PAPR_PMEM_UNARMED PPC_BIT(0)
42
f93c8f14
SB
43/*
44 * The nvdimm size should be aligned to SCM block size.
45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
46 * in order to have SCM regions not to overlap with dimm memory regions.
47 * The SCM devices can have variable block sizes. For now, fixing the
48 * block size to the minimum value.
49 */
50#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
51
52/* Have an explicit check for alignment */
53QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
54
b5513584
SB
55#define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
56OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice, SPAPRNVDIMMClass, SPAPR_NVDIMM)
57
58struct SPAPRNVDIMMClass {
59 /* private */
60 NVDIMMClass parent_class;
8601b4f1
SB
61
62 /* public */
63 void (*realize)(NVDIMMDevice *dimm, Error **errp);
64 void (*unrealize)(NVDIMMDevice *dimm, Error **errp);
b5513584
SB
65};
66
451c6905 67bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
beb6073f 68 uint64_t size, Error **errp)
ee3a71e3 69{
beb6073f 70 const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
28f5a716 71 const MachineState *ms = MACHINE(hotplug_dev);
8601b4f1
SB
72 PCDIMMDevice *dimm = PC_DIMM(nvdimm);
73 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
90d282d0 74 g_autofree char *uuidstr = NULL;
ee3a71e3 75 QemuUUID uuid;
af7084e7 76 int ret;
ee3a71e3 77
beb6073f
DHB
78 if (!mc->nvdimm_supported) {
79 error_setg(errp, "NVDIMM hotplug not supported for this machine");
451c6905 80 return false;
beb6073f
DHB
81 }
82
55810e90 83 if (!ms->nvdimms_state->is_enabled) {
28f5a716 84 error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
451c6905 85 return false;
28f5a716
DHB
86 }
87
70fc9cb0
DHB
88 if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
89 &error_abort) == 0) {
6c0f0cb3 90 error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
451c6905 91 return false;
70fc9cb0
DHB
92 }
93
ee3a71e3 94 if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
6c0f0cb3
DG
95 error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
96 " to be a multiple of %" PRIu64 "MB",
ee3a71e3 97 SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
451c6905 98 return false;
ee3a71e3
SB
99 }
100
af7084e7
SB
101 uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
102 &error_abort);
103 ret = qemu_uuid_parse(uuidstr, &uuid);
104 g_assert(!ret);
ee3a71e3
SB
105
106 if (qemu_uuid_is_null(&uuid)) {
107 error_setg(errp, "NVDIMM device requires the uuid to be set");
451c6905 108 return false;
ee3a71e3 109 }
451c6905 110
8601b4f1
SB
111 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) &&
112 (memory_region_get_fd(mr) < 0)) {
113 error_setg(errp, "spapr-nvdimm device requires the "
114 "memdev %s to be of memory-backend-file type",
115 object_get_canonical_path_component(OBJECT(dimm->hostmem)));
116 return false;
117 }
118
451c6905 119 return true;
ee3a71e3
SB
120}
121
122
ea042c53 123void spapr_add_nvdimm(DeviceState *dev, uint64_t slot)
ee3a71e3
SB
124{
125 SpaprDrc *drc;
126 bool hotplugged = spapr_drc_hotplugged(dev);
ee3a71e3
SB
127
128 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
129 g_assert(drc);
130
ea042c53
GK
131 /*
132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
133 * corresponding DRC is thus assumed to be attachable.
134 */
bc370a65 135 spapr_drc_attach(drc, dev);
ee3a71e3
SB
136
137 if (hotplugged) {
138 spapr_hotplug_req_add_by_index(drc);
139 }
140}
141
f1aa45ff
DHB
142static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
143 int parent_offset, NVDIMMDevice *nvdimm)
ee3a71e3
SB
144{
145 int child_offset;
146 char *buf;
147 SpaprDrc *drc;
148 uint32_t drc_idx;
149 uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP,
150 &error_abort);
151 uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
152 &error_abort);
ee3a71e3
SB
153 uint64_t lsize = nvdimm->label_size;
154 uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
155 NULL);
156
157 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
158 g_assert(drc);
159
160 drc_idx = spapr_drc_index(drc);
161
162 buf = g_strdup_printf("ibm,pmemory@%x", drc_idx);
163 child_offset = fdt_add_subnode(fdt, parent_offset, buf);
164 g_free(buf);
165
166 _FDT(child_offset);
167
168 _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx)));
169 _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
170 _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
171
f1aa45ff 172 spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
ee3a71e3
SB
173
174 buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
175 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
176 g_free(buf);
177
178 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx)));
179
180 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size",
181 SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
182 _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks",
183 size / SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
184 _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize)));
185
186 _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application",
187 "operating-system")));
188 _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
189
8601b4f1
SB
190 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
191 bool is_pmem = false, pmem_override = false;
192 PCDIMMDevice *dimm = PC_DIMM(nvdimm);
193 HostMemoryBackend *hostmem = dimm->hostmem;
194
195 is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem", NULL);
196 pmem_override = object_property_get_bool(OBJECT(nvdimm),
197 "pmem-override", NULL);
198 if (!is_pmem || pmem_override) {
199 _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
200 NULL, 0));
201 }
202 }
203
ee3a71e3
SB
204 return child_offset;
205}
206
6ee1d62e
DHB
207int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
208 void *fdt, int *fdt_start_offset, Error **errp)
209{
210 NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
211
f1aa45ff 212 *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
6ee1d62e
DHB
213
214 return 0;
215}
216
f1aa45ff 217void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
ee3a71e3 218{
9f9f82da 219 int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory");
ee3a71e3
SB
220 GSList *iter, *nvdimms = nvdimm_get_device_list();
221
222 if (offset < 0) {
9f9f82da 223 offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory");
ee3a71e3
SB
224 _FDT(offset);
225 _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
226 _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
227 _FDT((fdt_setprop_string(fdt, offset, "device_type",
228 "ibm,persistent-memory")));
229 }
230
231 /* Create DT entries for cold plugged NVDIMM devices */
232 for (iter = nvdimms; iter; iter = iter->next) {
233 NVDIMMDevice *nvdimm = iter->data;
234
f1aa45ff 235 spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
ee3a71e3
SB
236 }
237 g_slist_free(nvdimms);
238
239 return;
240}
b5fca656
SB
241
242static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
243 SpaprMachineState *spapr,
244 target_ulong opcode,
245 target_ulong *args)
246{
247 uint32_t drc_index = args[0];
248 uint64_t offset = args[1];
249 uint64_t len = args[2];
250 SpaprDrc *drc = spapr_drc_by_index(drc_index);
251 NVDIMMDevice *nvdimm;
252 NVDIMMClass *ddc;
253 uint64_t data = 0;
254 uint8_t buf[8] = { 0 };
255
256 if (!drc || !drc->dev ||
257 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
258 return H_PARAMETER;
259 }
260
261 if (len != 1 && len != 2 &&
262 len != 4 && len != 8) {
263 return H_P3;
264 }
265
266 nvdimm = NVDIMM(drc->dev);
267 if ((offset + len < offset) ||
268 (nvdimm->label_size < len + offset)) {
269 return H_P2;
270 }
271
272 ddc = NVDIMM_GET_CLASS(nvdimm);
273 ddc->read_label_data(nvdimm, buf, len, offset);
274
275 switch (len) {
276 case 1:
277 data = ldub_p(buf);
278 break;
279 case 2:
280 data = lduw_be_p(buf);
281 break;
282 case 4:
283 data = ldl_be_p(buf);
284 break;
285 case 8:
286 data = ldq_be_p(buf);
287 break;
288 default:
289 g_assert_not_reached();
290 }
291
292 args[0] = data;
293
294 return H_SUCCESS;
295}
296
297static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
298 SpaprMachineState *spapr,
299 target_ulong opcode,
300 target_ulong *args)
301{
302 uint32_t drc_index = args[0];
303 uint64_t offset = args[1];
304 uint64_t data = args[2];
305 uint64_t len = args[3];
306 SpaprDrc *drc = spapr_drc_by_index(drc_index);
307 NVDIMMDevice *nvdimm;
308 NVDIMMClass *ddc;
309 uint8_t buf[8] = { 0 };
310
311 if (!drc || !drc->dev ||
312 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
313 return H_PARAMETER;
314 }
315
316 if (len != 1 && len != 2 &&
317 len != 4 && len != 8) {
318 return H_P4;
319 }
320
321 nvdimm = NVDIMM(drc->dev);
322 if ((offset + len < offset) ||
3a125839
DH
323 (nvdimm->label_size < len + offset) ||
324 nvdimm->readonly) {
b5fca656
SB
325 return H_P2;
326 }
327
328 switch (len) {
329 case 1:
330 if (data & 0xffffffffffffff00) {
331 return H_P2;
332 }
333 stb_p(buf, data);
334 break;
335 case 2:
336 if (data & 0xffffffffffff0000) {
337 return H_P2;
338 }
339 stw_be_p(buf, data);
340 break;
341 case 4:
342 if (data & 0xffffffff00000000) {
343 return H_P2;
344 }
345 stl_be_p(buf, data);
346 break;
347 case 8:
348 stq_be_p(buf, data);
349 break;
350 default:
351 g_assert_not_reached();
352 }
353
354 ddc = NVDIMM_GET_CLASS(nvdimm);
355 ddc->write_label_data(nvdimm, buf, len, offset);
356
357 return H_SUCCESS;
358}
359
360static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
361 target_ulong opcode, target_ulong *args)
362{
363 uint32_t drc_index = args[0];
364 uint64_t starting_idx = args[1];
365 uint64_t no_of_scm_blocks_to_bind = args[2];
366 uint64_t target_logical_mem_addr = args[3];
367 uint64_t continue_token = args[4];
368 uint64_t size;
369 uint64_t total_no_of_scm_blocks;
370 SpaprDrc *drc = spapr_drc_by_index(drc_index);
371 hwaddr addr;
372 NVDIMMDevice *nvdimm;
373
374 if (!drc || !drc->dev ||
375 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
376 return H_PARAMETER;
377 }
378
379 /*
380 * Currently continue token should be zero qemu has already bound
381 * everything and this hcall doesnt return H_BUSY.
382 */
383 if (continue_token > 0) {
384 return H_P5;
385 }
386
387 /* Currently qemu assigns the address. */
388 if (target_logical_mem_addr != 0xffffffffffffffff) {
389 return H_OVERLAP;
390 }
391
392 nvdimm = NVDIMM(drc->dev);
393
394 size = object_property_get_uint(OBJECT(nvdimm),
395 PC_DIMM_SIZE_PROP, &error_abort);
396
397 total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
398
399 if (starting_idx > total_no_of_scm_blocks) {
400 return H_P2;
401 }
402
403 if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) ||
404 ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) {
405 return H_P3;
406 }
407
408 addr = object_property_get_uint(OBJECT(nvdimm),
409 PC_DIMM_ADDR_PROP, &error_abort);
410
411 addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
412
413 /* Already bound, Return target logical address in R5 */
414 args[1] = addr;
415 args[2] = no_of_scm_blocks_to_bind;
416
417 return H_SUCCESS;
418}
419
b5513584
SB
420typedef struct SpaprNVDIMMDeviceFlushState {
421 uint64_t continue_token;
422 int64_t hcall_ret;
423 uint32_t drcidx;
424
425 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState) node;
426} SpaprNVDIMMDeviceFlushState;
427
428typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice;
429struct SpaprNVDIMMDevice {
8601b4f1 430 /* private */
b5513584
SB
431 NVDIMMDevice parent_obj;
432
8601b4f1 433 bool hcall_flush_required;
b5513584
SB
434 uint64_t nvdimm_flush_token;
435 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) pending_nvdimm_flush_states;
436 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) completed_nvdimm_flush_states;
8601b4f1
SB
437
438 /* public */
439
440 /*
441 * The 'on' value for this property forced the qemu to enable the hcall
442 * flush for the nvdimm device even if the backend is a pmem
443 */
444 bool pmem_override;
b5513584
SB
445};
446
447static int flush_worker_cb(void *opaque)
448{
449 SpaprNVDIMMDeviceFlushState *state = opaque;
450 SpaprDrc *drc = spapr_drc_by_index(state->drcidx);
edccf661
DHB
451 PCDIMMDevice *dimm;
452 HostMemoryBackend *backend;
453 int backend_fd;
454
455 g_assert(drc != NULL);
456
457 dimm = PC_DIMM(drc->dev);
458 backend = MEMORY_BACKEND(dimm->hostmem);
459 backend_fd = memory_region_get_fd(&backend->mr);
b5513584
SB
460
461 if (object_property_get_bool(OBJECT(backend), "pmem", NULL)) {
462 MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
463 void *ptr = memory_region_get_ram_ptr(mr);
464 size_t size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
465 NULL);
466
467 /* flush pmem backend */
468 pmem_persist(ptr, size);
469 } else {
470 /* flush raw backing image */
471 if (qemu_fdatasync(backend_fd) < 0) {
472 error_report("papr_scm: Could not sync nvdimm to backend file: %s",
473 strerror(errno));
474 return H_HARDWARE;
475 }
476 }
477
478 return H_SUCCESS;
479}
480
481static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret)
482{
483 SpaprNVDIMMDeviceFlushState *state = opaque;
484 SpaprDrc *drc = spapr_drc_by_index(state->drcidx);
edccf661
DHB
485 SpaprNVDIMMDevice *s_nvdimm;
486
487 g_assert(drc != NULL);
488
489 s_nvdimm = SPAPR_NVDIMM(drc->dev);
b5513584
SB
490
491 state->hcall_ret = hcall_ret;
492 QLIST_REMOVE(state, node);
493 QLIST_INSERT_HEAD(&s_nvdimm->completed_nvdimm_flush_states, state, node);
494}
495
496static int spapr_nvdimm_flush_post_load(void *opaque, int version_id)
497{
498 SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque;
499 SpaprNVDIMMDeviceFlushState *state;
8601b4f1
SB
500 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem);
501 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
502 bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm),
503 "pmem-override", NULL);
504 bool dest_hcall_flush_required = pmem_override || !is_pmem;
505
506 if (!s_nvdimm->hcall_flush_required && dest_hcall_flush_required) {
507 error_report("The file backend for the spapr-nvdimm device %s at "
508 "source is a pmem, use pmem=on and pmem-override=off to "
509 "continue.", DEVICE(s_nvdimm)->id);
510 return -EINVAL;
511 }
512 if (s_nvdimm->hcall_flush_required && !dest_hcall_flush_required) {
513 error_report("The guest expects hcall-flush support for the "
514 "spapr-nvdimm device %s, use pmem_override=on to "
515 "continue.", DEVICE(s_nvdimm)->id);
516 return -EINVAL;
517 }
b5513584
SB
518
519 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
aef04fc7 520 thread_pool_submit_aio(flush_worker_cb, state,
b5513584
SB
521 spapr_nvdimm_flush_completion_cb, state);
522 }
523
524 return 0;
525}
526
527static const VMStateDescription vmstate_spapr_nvdimm_flush_state = {
528 .name = "spapr_nvdimm_flush_state",
529 .version_id = 1,
530 .minimum_version_id = 1,
531 .fields = (VMStateField[]) {
532 VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState),
533 VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState),
534 VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState),
535 VMSTATE_END_OF_LIST()
536 },
537};
538
539const VMStateDescription vmstate_spapr_nvdimm_states = {
540 .name = "spapr_nvdimm_states",
541 .version_id = 1,
542 .minimum_version_id = 1,
543 .post_load = spapr_nvdimm_flush_post_load,
544 .fields = (VMStateField[]) {
8601b4f1 545 VMSTATE_BOOL(hcall_flush_required, SpaprNVDIMMDevice),
b5513584
SB
546 VMSTATE_UINT64(nvdimm_flush_token, SpaprNVDIMMDevice),
547 VMSTATE_QLIST_V(completed_nvdimm_flush_states, SpaprNVDIMMDevice, 1,
548 vmstate_spapr_nvdimm_flush_state,
549 SpaprNVDIMMDeviceFlushState, node),
550 VMSTATE_QLIST_V(pending_nvdimm_flush_states, SpaprNVDIMMDevice, 1,
551 vmstate_spapr_nvdimm_flush_state,
552 SpaprNVDIMMDeviceFlushState, node),
553 VMSTATE_END_OF_LIST()
554 },
555};
556
557/*
558 * Assign a token and reserve it for the new flush state.
559 */
560static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state(
561 SpaprNVDIMMDevice *spapr_nvdimm)
562{
563 SpaprNVDIMMDeviceFlushState *state;
564
565 state = g_malloc0(sizeof(*state));
566
567 spapr_nvdimm->nvdimm_flush_token++;
568 /* Token zero is presumed as no job pending. Assert on overflow to zero */
569 g_assert(spapr_nvdimm->nvdimm_flush_token != 0);
570
571 state->continue_token = spapr_nvdimm->nvdimm_flush_token;
572
573 QLIST_INSERT_HEAD(&spapr_nvdimm->pending_nvdimm_flush_states, state, node);
574
575 return state;
576}
577
578/*
579 * spapr_nvdimm_finish_flushes
580 * Waits for all pending flush requests to complete
581 * their execution and free the states
582 */
583void spapr_nvdimm_finish_flushes(void)
584{
585 SpaprNVDIMMDeviceFlushState *state, *next;
586 GSList *list, *nvdimms;
587
588 /*
589 * Called on reset path, the main loop thread which calls
590 * the pending BHs has gotten out running in the reset path,
591 * finally reaching here. Other code path being guest
592 * h_client_architecture_support, thats early boot up.
593 */
594 nvdimms = nvdimm_get_device_list();
595 for (list = nvdimms; list; list = list->next) {
596 NVDIMMDevice *nvdimm = list->data;
597 if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
598 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(nvdimm);
599 while (!QLIST_EMPTY(&s_nvdimm->pending_nvdimm_flush_states)) {
600 aio_poll(qemu_get_aio_context(), true);
601 }
602
603 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states,
604 node, next) {
605 QLIST_REMOVE(state, node);
606 g_free(state);
607 }
608 }
609 }
610 g_slist_free(nvdimms);
611}
612
613/*
614 * spapr_nvdimm_get_flush_status
615 * Fetches the status of the hcall worker and returns
616 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running.
617 */
618static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice *s_nvdimm,
619 uint64_t token)
620{
621 SpaprNVDIMMDeviceFlushState *state, *node;
622
623 QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) {
624 if (state->continue_token == token) {
625 return H_LONG_BUSY_ORDER_10_MSEC;
626 }
627 }
628
629 QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states,
630 node, node) {
631 if (state->continue_token == token) {
632 int ret = state->hcall_ret;
633 QLIST_REMOVE(state, node);
634 g_free(state);
635 return ret;
636 }
637 }
638
639 /* If not found in complete list too, invalid token */
640 return H_P2;
641}
642
643/*
644 * H_SCM_FLUSH
645 * Input: drc_index, continue-token
646 * Out: continue-token
647 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC,
648 * H_UNSUPPORTED
649 *
650 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns
651 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall
652 * needs to be issued multiple times in order to be completely serviced. The
653 * continue-token from the output to be passed in the argument list of
654 * subsequent hcalls until the hcall is completely serviced at which point
655 * H_SUCCESS or other error is returned.
656 */
657static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
658 target_ulong opcode, target_ulong *args)
659{
660 int ret;
661 uint32_t drc_index = args[0];
662 uint64_t continue_token = args[1];
663 SpaprDrc *drc = spapr_drc_by_index(drc_index);
664 PCDIMMDevice *dimm;
665 HostMemoryBackend *backend = NULL;
666 SpaprNVDIMMDeviceFlushState *state;
b5513584
SB
667 int fd;
668
669 if (!drc || !drc->dev ||
670 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
671 return H_PARAMETER;
672 }
673
674 dimm = PC_DIMM(drc->dev);
8601b4f1
SB
675 if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) {
676 return H_PARAMETER;
677 }
b5513584 678 if (continue_token == 0) {
8601b4f1 679 bool is_pmem = false, pmem_override = false;
b5513584
SB
680 backend = MEMORY_BACKEND(dimm->hostmem);
681 fd = memory_region_get_fd(&backend->mr);
682
683 if (fd < 0) {
684 return H_UNSUPPORTED;
685 }
686
8601b4f1
SB
687 is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
688 pmem_override = object_property_get_bool(OBJECT(dimm),
689 "pmem-override", NULL);
690 if (is_pmem && !pmem_override) {
691 return H_UNSUPPORTED;
692 }
693
b5513584
SB
694 state = spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm));
695 if (!state) {
696 return H_HARDWARE;
697 }
698
699 state->drcidx = drc_index;
700
aef04fc7 701 thread_pool_submit_aio(flush_worker_cb, state,
b5513584
SB
702 spapr_nvdimm_flush_completion_cb, state);
703
704 continue_token = state->continue_token;
705 }
706
707 ret = spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm), continue_token);
708 if (H_IS_LONG_BUSY(ret)) {
709 args[0] = continue_token;
710 }
711
712 return ret;
713}
714
b5fca656
SB
715static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
716 target_ulong opcode, target_ulong *args)
717{
718 uint32_t drc_index = args[0];
719 uint64_t starting_scm_logical_addr = args[1];
720 uint64_t no_of_scm_blocks_to_unbind = args[2];
721 uint64_t continue_token = args[3];
722 uint64_t size_to_unbind;
723 Range blockrange = range_empty;
724 Range nvdimmrange = range_empty;
725 SpaprDrc *drc = spapr_drc_by_index(drc_index);
726 NVDIMMDevice *nvdimm;
727 uint64_t size, addr;
728
729 if (!drc || !drc->dev ||
730 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
731 return H_PARAMETER;
732 }
733
734 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
735 if (continue_token > 0) {
736 return H_P4;
737 }
738
739 /* Check if starting_scm_logical_addr is block aligned */
740 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr,
741 SPAPR_MINIMUM_SCM_BLOCK_SIZE)) {
742 return H_P2;
743 }
744
745 size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
746 if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind !=
747 size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
748 return H_P3;
749 }
750
751 nvdimm = NVDIMM(drc->dev);
752 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
753 &error_abort);
754 addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP,
755 &error_abort);
756
757 range_init_nofail(&nvdimmrange, addr, size);
758 range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind);
759
760 if (!range_contains_range(&nvdimmrange, &blockrange)) {
761 return H_P3;
762 }
763
764 args[1] = no_of_scm_blocks_to_unbind;
765
766 /* let unplug take care of actual unbind */
767 return H_SUCCESS;
768}
769
770#define H_UNBIND_SCOPE_ALL 0x1
771#define H_UNBIND_SCOPE_DRC 0x2
772
773static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr,
774 target_ulong opcode, target_ulong *args)
775{
776 uint64_t target_scope = args[0];
777 uint32_t drc_index = args[1];
778 uint64_t continue_token = args[2];
779 NVDIMMDevice *nvdimm;
780 uint64_t size;
781 uint64_t no_of_scm_blocks_unbound = 0;
782
783 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
784 if (continue_token > 0) {
785 return H_P4;
786 }
787
788 if (target_scope == H_UNBIND_SCOPE_DRC) {
789 SpaprDrc *drc = spapr_drc_by_index(drc_index);
790
791 if (!drc || !drc->dev ||
792 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
793 return H_P2;
794 }
795
796 nvdimm = NVDIMM(drc->dev);
797 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
798 &error_abort);
799
800 no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
801 } else if (target_scope == H_UNBIND_SCOPE_ALL) {
802 GSList *list, *nvdimms;
803
804 nvdimms = nvdimm_get_device_list();
805 for (list = nvdimms; list; list = list->next) {
806 nvdimm = list->data;
807 size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
808 &error_abort);
809
810 no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
811 }
812 g_slist_free(nvdimms);
813 } else {
814 return H_PARAMETER;
815 }
816
817 args[1] = no_of_scm_blocks_unbound;
818
819 /* let unplug take care of actual unbind */
820 return H_SUCCESS;
821}
822
53d7d7e2
VJ
823static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr,
824 target_ulong opcode, target_ulong *args)
825{
826
827 NVDIMMDevice *nvdimm;
828 uint64_t hbitmap = 0;
829 uint32_t drc_index = args[0];
830 SpaprDrc *drc = spapr_drc_by_index(drc_index);
831 const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED;
832
833
834 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
835 if (!drc || !drc->dev ||
836 spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
837 return H_PARAMETER;
838 }
839
840 nvdimm = NVDIMM(drc->dev);
841
842 /* Update if the nvdimm is unarmed and send its status via health bitmaps */
843 if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) {
844 hbitmap |= PAPR_PMEM_UNARMED;
845 }
846
847 /* Update the out args with health bitmap/mask */
848 args[0] = hbitmap;
849 args[1] = hbitmap_mask;
850
851 return H_SUCCESS;
852}
853
b5fca656
SB
854static void spapr_scm_register_types(void)
855{
856 /* qemu/scm specific hcalls */
857 spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata);
858 spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata);
859 spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
860 spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
861 spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
53d7d7e2 862 spapr_register_hypercall(H_SCM_HEALTH, h_scm_health);
b5513584 863 spapr_register_hypercall(H_SCM_FLUSH, h_scm_flush);
b5fca656
SB
864}
865
866type_init(spapr_scm_register_types)
8601b4f1
SB
867
868static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp)
869{
870 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(dimm);
871 HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(dimm)->hostmem);
872 bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL);
873 bool pmem_override = object_property_get_bool(OBJECT(dimm), "pmem-override",
874 NULL);
875 if (!is_pmem || pmem_override) {
876 s_nvdimm->hcall_flush_required = true;
877 }
878
879 vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY,
880 &vmstate_spapr_nvdimm_states, dimm);
881}
882
883static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm)
884{
885 vmstate_unregister(NULL, &vmstate_spapr_nvdimm_states, dimm);
886}
887
888static Property spapr_nvdimm_properties[] = {
889#ifdef CONFIG_LIBPMEM
890 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice, pmem_override, false),
891#endif
892 DEFINE_PROP_END_OF_LIST(),
893};
894
895static void spapr_nvdimm_class_init(ObjectClass *oc, void *data)
896{
897 DeviceClass *dc = DEVICE_CLASS(oc);
898 NVDIMMClass *nvc = NVDIMM_CLASS(oc);
899
900 nvc->realize = spapr_nvdimm_realize;
901 nvc->unrealize = spapr_nvdimm_unrealize;
902
903 device_class_set_props(dc, spapr_nvdimm_properties);
904}
905
906static void spapr_nvdimm_init(Object *obj)
907{
908 SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(obj);
909
910 s_nvdimm->hcall_flush_required = false;
911 QLIST_INIT(&s_nvdimm->pending_nvdimm_flush_states);
912 QLIST_INIT(&s_nvdimm->completed_nvdimm_flush_states);
913}
914
915static TypeInfo spapr_nvdimm_info = {
916 .name = TYPE_SPAPR_NVDIMM,
917 .parent = TYPE_NVDIMM,
918 .class_init = spapr_nvdimm_class_init,
919 .class_size = sizeof(SPAPRNVDIMMClass),
920 .instance_size = sizeof(SpaprNVDIMMDevice),
921 .instance_init = spapr_nvdimm_init,
922};
923
924static void spapr_nvdimm_register_types(void)
925{
926 type_register_static(&spapr_nvdimm_info);
927}
928
929type_init(spapr_nvdimm_register_types)