]>
Commit | Line | Data |
---|---|---|
ee3a71e3 SB |
1 | /* |
2 | * QEMU PAPR Storage Class Memory Interfaces | |
3 | * | |
4 | * Copyright (c) 2019-2020, IBM Corporation. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | #include "qemu/osdep.h" | |
b5513584 | 25 | #include "qemu/cutils.h" |
ee3a71e3 SB |
26 | #include "qapi/error.h" |
27 | #include "hw/ppc/spapr_drc.h" | |
28 | #include "hw/ppc/spapr_nvdimm.h" | |
29 | #include "hw/mem/nvdimm.h" | |
30 | #include "qemu/nvdimm-utils.h" | |
31 | #include "hw/ppc/fdt.h" | |
b5fca656 | 32 | #include "qemu/range.h" |
f1aa45ff | 33 | #include "hw/ppc/spapr_numa.h" |
b5513584 SB |
34 | #include "block/thread-pool.h" |
35 | #include "migration/vmstate.h" | |
36 | #include "qemu/pmem.h" | |
8601b4f1 | 37 | #include "hw/qdev-properties.h" |
ee3a71e3 | 38 | |
53d7d7e2 VJ |
39 | /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */ |
40 | /* SCM device is unable to persist memory contents */ | |
41 | #define PAPR_PMEM_UNARMED PPC_BIT(0) | |
42 | ||
f93c8f14 SB |
43 | /* |
44 | * The nvdimm size should be aligned to SCM block size. | |
45 | * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE | |
46 | * in order to have SCM regions not to overlap with dimm memory regions. | |
47 | * The SCM devices can have variable block sizes. For now, fixing the | |
48 | * block size to the minimum value. | |
49 | */ | |
50 | #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE | |
51 | ||
52 | /* Have an explicit check for alignment */ | |
53 | QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); | |
54 | ||
b5513584 SB |
55 | #define TYPE_SPAPR_NVDIMM "spapr-nvdimm" |
56 | OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice, SPAPRNVDIMMClass, SPAPR_NVDIMM) | |
57 | ||
58 | struct SPAPRNVDIMMClass { | |
59 | /* private */ | |
60 | NVDIMMClass parent_class; | |
8601b4f1 SB |
61 | |
62 | /* public */ | |
63 | void (*realize)(NVDIMMDevice *dimm, Error **errp); | |
64 | void (*unrealize)(NVDIMMDevice *dimm, Error **errp); | |
b5513584 SB |
65 | }; |
66 | ||
451c6905 | 67 | bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, |
beb6073f | 68 | uint64_t size, Error **errp) |
ee3a71e3 | 69 | { |
beb6073f | 70 | const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); |
28f5a716 | 71 | const MachineState *ms = MACHINE(hotplug_dev); |
8601b4f1 SB |
72 | PCDIMMDevice *dimm = PC_DIMM(nvdimm); |
73 | MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); | |
90d282d0 | 74 | g_autofree char *uuidstr = NULL; |
ee3a71e3 | 75 | QemuUUID uuid; |
af7084e7 | 76 | int ret; |
ee3a71e3 | 77 | |
beb6073f DHB |
78 | if (!mc->nvdimm_supported) { |
79 | error_setg(errp, "NVDIMM hotplug not supported for this machine"); | |
451c6905 | 80 | return false; |
beb6073f DHB |
81 | } |
82 | ||
55810e90 | 83 | if (!ms->nvdimms_state->is_enabled) { |
28f5a716 | 84 | error_setg(errp, "nvdimm device found but 'nvdimm=off' was set"); |
451c6905 | 85 | return false; |
28f5a716 DHB |
86 | } |
87 | ||
70fc9cb0 DHB |
88 | if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, |
89 | &error_abort) == 0) { | |
6c0f0cb3 | 90 | error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); |
451c6905 | 91 | return false; |
70fc9cb0 DHB |
92 | } |
93 | ||
ee3a71e3 | 94 | if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) { |
6c0f0cb3 DG |
95 | error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)" |
96 | " to be a multiple of %" PRIu64 "MB", | |
ee3a71e3 | 97 | SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB); |
451c6905 | 98 | return false; |
ee3a71e3 SB |
99 | } |
100 | ||
af7084e7 SB |
101 | uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP, |
102 | &error_abort); | |
103 | ret = qemu_uuid_parse(uuidstr, &uuid); | |
104 | g_assert(!ret); | |
ee3a71e3 SB |
105 | |
106 | if (qemu_uuid_is_null(&uuid)) { | |
107 | error_setg(errp, "NVDIMM device requires the uuid to be set"); | |
451c6905 | 108 | return false; |
ee3a71e3 | 109 | } |
451c6905 | 110 | |
8601b4f1 SB |
111 | if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) && |
112 | (memory_region_get_fd(mr) < 0)) { | |
113 | error_setg(errp, "spapr-nvdimm device requires the " | |
114 | "memdev %s to be of memory-backend-file type", | |
115 | object_get_canonical_path_component(OBJECT(dimm->hostmem))); | |
116 | return false; | |
117 | } | |
118 | ||
451c6905 | 119 | return true; |
ee3a71e3 SB |
120 | } |
121 | ||
122 | ||
ea042c53 | 123 | void spapr_add_nvdimm(DeviceState *dev, uint64_t slot) |
ee3a71e3 SB |
124 | { |
125 | SpaprDrc *drc; | |
126 | bool hotplugged = spapr_drc_hotplugged(dev); | |
ee3a71e3 SB |
127 | |
128 | drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); | |
129 | g_assert(drc); | |
130 | ||
ea042c53 GK |
131 | /* |
132 | * pc_dimm_get_free_slot() provided a free slot at pre-plug. The | |
133 | * corresponding DRC is thus assumed to be attachable. | |
134 | */ | |
bc370a65 | 135 | spapr_drc_attach(drc, dev); |
ee3a71e3 SB |
136 | |
137 | if (hotplugged) { | |
138 | spapr_hotplug_req_add_by_index(drc); | |
139 | } | |
140 | } | |
141 | ||
f1aa45ff DHB |
142 | static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt, |
143 | int parent_offset, NVDIMMDevice *nvdimm) | |
ee3a71e3 SB |
144 | { |
145 | int child_offset; | |
146 | char *buf; | |
147 | SpaprDrc *drc; | |
148 | uint32_t drc_idx; | |
149 | uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP, | |
150 | &error_abort); | |
151 | uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, | |
152 | &error_abort); | |
ee3a71e3 SB |
153 | uint64_t lsize = nvdimm->label_size; |
154 | uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, | |
155 | NULL); | |
156 | ||
157 | drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); | |
158 | g_assert(drc); | |
159 | ||
160 | drc_idx = spapr_drc_index(drc); | |
161 | ||
162 | buf = g_strdup_printf("ibm,pmemory@%x", drc_idx); | |
163 | child_offset = fdt_add_subnode(fdt, parent_offset, buf); | |
164 | g_free(buf); | |
165 | ||
166 | _FDT(child_offset); | |
167 | ||
168 | _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx))); | |
169 | _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); | |
170 | _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); | |
171 | ||
f1aa45ff | 172 | spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node); |
ee3a71e3 SB |
173 | |
174 | buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); | |
175 | _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); | |
176 | g_free(buf); | |
177 | ||
178 | _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx))); | |
179 | ||
180 | _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size", | |
181 | SPAPR_MINIMUM_SCM_BLOCK_SIZE))); | |
182 | _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks", | |
183 | size / SPAPR_MINIMUM_SCM_BLOCK_SIZE))); | |
184 | _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize))); | |
185 | ||
186 | _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application", | |
187 | "operating-system"))); | |
188 | _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0)); | |
189 | ||
8601b4f1 SB |
190 | if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { |
191 | bool is_pmem = false, pmem_override = false; | |
192 | PCDIMMDevice *dimm = PC_DIMM(nvdimm); | |
193 | HostMemoryBackend *hostmem = dimm->hostmem; | |
194 | ||
195 | is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem", NULL); | |
196 | pmem_override = object_property_get_bool(OBJECT(nvdimm), | |
197 | "pmem-override", NULL); | |
198 | if (!is_pmem || pmem_override) { | |
199 | _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required", | |
200 | NULL, 0)); | |
201 | } | |
202 | } | |
203 | ||
ee3a71e3 SB |
204 | return child_offset; |
205 | } | |
206 | ||
6ee1d62e DHB |
207 | int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, |
208 | void *fdt, int *fdt_start_offset, Error **errp) | |
209 | { | |
210 | NVDIMMDevice *nvdimm = NVDIMM(drc->dev); | |
211 | ||
f1aa45ff | 212 | *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm); |
6ee1d62e DHB |
213 | |
214 | return 0; | |
215 | } | |
216 | ||
f1aa45ff | 217 | void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) |
ee3a71e3 | 218 | { |
9f9f82da | 219 | int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory"); |
ee3a71e3 SB |
220 | GSList *iter, *nvdimms = nvdimm_get_device_list(); |
221 | ||
222 | if (offset < 0) { | |
9f9f82da | 223 | offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory"); |
ee3a71e3 SB |
224 | _FDT(offset); |
225 | _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); | |
226 | _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); | |
227 | _FDT((fdt_setprop_string(fdt, offset, "device_type", | |
228 | "ibm,persistent-memory"))); | |
229 | } | |
230 | ||
231 | /* Create DT entries for cold plugged NVDIMM devices */ | |
232 | for (iter = nvdimms; iter; iter = iter->next) { | |
233 | NVDIMMDevice *nvdimm = iter->data; | |
234 | ||
f1aa45ff | 235 | spapr_dt_nvdimm(spapr, fdt, offset, nvdimm); |
ee3a71e3 SB |
236 | } |
237 | g_slist_free(nvdimms); | |
238 | ||
239 | return; | |
240 | } | |
b5fca656 SB |
241 | |
242 | static target_ulong h_scm_read_metadata(PowerPCCPU *cpu, | |
243 | SpaprMachineState *spapr, | |
244 | target_ulong opcode, | |
245 | target_ulong *args) | |
246 | { | |
247 | uint32_t drc_index = args[0]; | |
248 | uint64_t offset = args[1]; | |
249 | uint64_t len = args[2]; | |
250 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
251 | NVDIMMDevice *nvdimm; | |
252 | NVDIMMClass *ddc; | |
253 | uint64_t data = 0; | |
254 | uint8_t buf[8] = { 0 }; | |
255 | ||
256 | if (!drc || !drc->dev || | |
257 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
258 | return H_PARAMETER; | |
259 | } | |
260 | ||
261 | if (len != 1 && len != 2 && | |
262 | len != 4 && len != 8) { | |
263 | return H_P3; | |
264 | } | |
265 | ||
266 | nvdimm = NVDIMM(drc->dev); | |
267 | if ((offset + len < offset) || | |
268 | (nvdimm->label_size < len + offset)) { | |
269 | return H_P2; | |
270 | } | |
271 | ||
272 | ddc = NVDIMM_GET_CLASS(nvdimm); | |
273 | ddc->read_label_data(nvdimm, buf, len, offset); | |
274 | ||
275 | switch (len) { | |
276 | case 1: | |
277 | data = ldub_p(buf); | |
278 | break; | |
279 | case 2: | |
280 | data = lduw_be_p(buf); | |
281 | break; | |
282 | case 4: | |
283 | data = ldl_be_p(buf); | |
284 | break; | |
285 | case 8: | |
286 | data = ldq_be_p(buf); | |
287 | break; | |
288 | default: | |
289 | g_assert_not_reached(); | |
290 | } | |
291 | ||
292 | args[0] = data; | |
293 | ||
294 | return H_SUCCESS; | |
295 | } | |
296 | ||
297 | static target_ulong h_scm_write_metadata(PowerPCCPU *cpu, | |
298 | SpaprMachineState *spapr, | |
299 | target_ulong opcode, | |
300 | target_ulong *args) | |
301 | { | |
302 | uint32_t drc_index = args[0]; | |
303 | uint64_t offset = args[1]; | |
304 | uint64_t data = args[2]; | |
305 | uint64_t len = args[3]; | |
306 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
307 | NVDIMMDevice *nvdimm; | |
308 | NVDIMMClass *ddc; | |
309 | uint8_t buf[8] = { 0 }; | |
310 | ||
311 | if (!drc || !drc->dev || | |
312 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
313 | return H_PARAMETER; | |
314 | } | |
315 | ||
316 | if (len != 1 && len != 2 && | |
317 | len != 4 && len != 8) { | |
318 | return H_P4; | |
319 | } | |
320 | ||
321 | nvdimm = NVDIMM(drc->dev); | |
322 | if ((offset + len < offset) || | |
3a125839 DH |
323 | (nvdimm->label_size < len + offset) || |
324 | nvdimm->readonly) { | |
b5fca656 SB |
325 | return H_P2; |
326 | } | |
327 | ||
328 | switch (len) { | |
329 | case 1: | |
330 | if (data & 0xffffffffffffff00) { | |
331 | return H_P2; | |
332 | } | |
333 | stb_p(buf, data); | |
334 | break; | |
335 | case 2: | |
336 | if (data & 0xffffffffffff0000) { | |
337 | return H_P2; | |
338 | } | |
339 | stw_be_p(buf, data); | |
340 | break; | |
341 | case 4: | |
342 | if (data & 0xffffffff00000000) { | |
343 | return H_P2; | |
344 | } | |
345 | stl_be_p(buf, data); | |
346 | break; | |
347 | case 8: | |
348 | stq_be_p(buf, data); | |
349 | break; | |
350 | default: | |
351 | g_assert_not_reached(); | |
352 | } | |
353 | ||
354 | ddc = NVDIMM_GET_CLASS(nvdimm); | |
355 | ddc->write_label_data(nvdimm, buf, len, offset); | |
356 | ||
357 | return H_SUCCESS; | |
358 | } | |
359 | ||
360 | static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, | |
361 | target_ulong opcode, target_ulong *args) | |
362 | { | |
363 | uint32_t drc_index = args[0]; | |
364 | uint64_t starting_idx = args[1]; | |
365 | uint64_t no_of_scm_blocks_to_bind = args[2]; | |
366 | uint64_t target_logical_mem_addr = args[3]; | |
367 | uint64_t continue_token = args[4]; | |
368 | uint64_t size; | |
369 | uint64_t total_no_of_scm_blocks; | |
370 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
371 | hwaddr addr; | |
372 | NVDIMMDevice *nvdimm; | |
373 | ||
374 | if (!drc || !drc->dev || | |
375 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
376 | return H_PARAMETER; | |
377 | } | |
378 | ||
379 | /* | |
380 | * Currently continue token should be zero qemu has already bound | |
e6a19a64 | 381 | * everything and this hcall doesn't return H_BUSY. |
b5fca656 SB |
382 | */ |
383 | if (continue_token > 0) { | |
384 | return H_P5; | |
385 | } | |
386 | ||
387 | /* Currently qemu assigns the address. */ | |
388 | if (target_logical_mem_addr != 0xffffffffffffffff) { | |
389 | return H_OVERLAP; | |
390 | } | |
391 | ||
392 | nvdimm = NVDIMM(drc->dev); | |
393 | ||
394 | size = object_property_get_uint(OBJECT(nvdimm), | |
395 | PC_DIMM_SIZE_PROP, &error_abort); | |
396 | ||
397 | total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; | |
398 | ||
399 | if (starting_idx > total_no_of_scm_blocks) { | |
400 | return H_P2; | |
401 | } | |
402 | ||
403 | if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) || | |
404 | ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) { | |
405 | return H_P3; | |
406 | } | |
407 | ||
408 | addr = object_property_get_uint(OBJECT(nvdimm), | |
409 | PC_DIMM_ADDR_PROP, &error_abort); | |
410 | ||
411 | addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE; | |
412 | ||
413 | /* Already bound, Return target logical address in R5 */ | |
414 | args[1] = addr; | |
415 | args[2] = no_of_scm_blocks_to_bind; | |
416 | ||
417 | return H_SUCCESS; | |
418 | } | |
419 | ||
b5513584 SB |
420 | typedef struct SpaprNVDIMMDeviceFlushState { |
421 | uint64_t continue_token; | |
422 | int64_t hcall_ret; | |
423 | uint32_t drcidx; | |
424 | ||
425 | QLIST_ENTRY(SpaprNVDIMMDeviceFlushState) node; | |
426 | } SpaprNVDIMMDeviceFlushState; | |
427 | ||
428 | typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice; | |
429 | struct SpaprNVDIMMDevice { | |
8601b4f1 | 430 | /* private */ |
b5513584 SB |
431 | NVDIMMDevice parent_obj; |
432 | ||
8601b4f1 | 433 | bool hcall_flush_required; |
b5513584 SB |
434 | uint64_t nvdimm_flush_token; |
435 | QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) pending_nvdimm_flush_states; | |
436 | QLIST_HEAD(, SpaprNVDIMMDeviceFlushState) completed_nvdimm_flush_states; | |
8601b4f1 SB |
437 | |
438 | /* public */ | |
439 | ||
440 | /* | |
441 | * The 'on' value for this property forced the qemu to enable the hcall | |
442 | * flush for the nvdimm device even if the backend is a pmem | |
443 | */ | |
444 | bool pmem_override; | |
b5513584 SB |
445 | }; |
446 | ||
447 | static int flush_worker_cb(void *opaque) | |
448 | { | |
449 | SpaprNVDIMMDeviceFlushState *state = opaque; | |
450 | SpaprDrc *drc = spapr_drc_by_index(state->drcidx); | |
edccf661 DHB |
451 | PCDIMMDevice *dimm; |
452 | HostMemoryBackend *backend; | |
453 | int backend_fd; | |
454 | ||
455 | g_assert(drc != NULL); | |
456 | ||
457 | dimm = PC_DIMM(drc->dev); | |
458 | backend = MEMORY_BACKEND(dimm->hostmem); | |
459 | backend_fd = memory_region_get_fd(&backend->mr); | |
b5513584 SB |
460 | |
461 | if (object_property_get_bool(OBJECT(backend), "pmem", NULL)) { | |
462 | MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem); | |
463 | void *ptr = memory_region_get_ram_ptr(mr); | |
464 | size_t size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, | |
465 | NULL); | |
466 | ||
467 | /* flush pmem backend */ | |
468 | pmem_persist(ptr, size); | |
469 | } else { | |
470 | /* flush raw backing image */ | |
471 | if (qemu_fdatasync(backend_fd) < 0) { | |
472 | error_report("papr_scm: Could not sync nvdimm to backend file: %s", | |
473 | strerror(errno)); | |
474 | return H_HARDWARE; | |
475 | } | |
476 | } | |
477 | ||
478 | return H_SUCCESS; | |
479 | } | |
480 | ||
481 | static void spapr_nvdimm_flush_completion_cb(void *opaque, int hcall_ret) | |
482 | { | |
483 | SpaprNVDIMMDeviceFlushState *state = opaque; | |
484 | SpaprDrc *drc = spapr_drc_by_index(state->drcidx); | |
edccf661 DHB |
485 | SpaprNVDIMMDevice *s_nvdimm; |
486 | ||
487 | g_assert(drc != NULL); | |
488 | ||
489 | s_nvdimm = SPAPR_NVDIMM(drc->dev); | |
b5513584 SB |
490 | |
491 | state->hcall_ret = hcall_ret; | |
492 | QLIST_REMOVE(state, node); | |
493 | QLIST_INSERT_HEAD(&s_nvdimm->completed_nvdimm_flush_states, state, node); | |
494 | } | |
495 | ||
496 | static int spapr_nvdimm_flush_post_load(void *opaque, int version_id) | |
497 | { | |
498 | SpaprNVDIMMDevice *s_nvdimm = (SpaprNVDIMMDevice *)opaque; | |
499 | SpaprNVDIMMDeviceFlushState *state; | |
8601b4f1 SB |
500 | HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(s_nvdimm)->hostmem); |
501 | bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); | |
502 | bool pmem_override = object_property_get_bool(OBJECT(s_nvdimm), | |
503 | "pmem-override", NULL); | |
504 | bool dest_hcall_flush_required = pmem_override || !is_pmem; | |
505 | ||
506 | if (!s_nvdimm->hcall_flush_required && dest_hcall_flush_required) { | |
507 | error_report("The file backend for the spapr-nvdimm device %s at " | |
508 | "source is a pmem, use pmem=on and pmem-override=off to " | |
509 | "continue.", DEVICE(s_nvdimm)->id); | |
510 | return -EINVAL; | |
511 | } | |
512 | if (s_nvdimm->hcall_flush_required && !dest_hcall_flush_required) { | |
513 | error_report("The guest expects hcall-flush support for the " | |
514 | "spapr-nvdimm device %s, use pmem_override=on to " | |
515 | "continue.", DEVICE(s_nvdimm)->id); | |
516 | return -EINVAL; | |
517 | } | |
b5513584 SB |
518 | |
519 | QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { | |
aef04fc7 | 520 | thread_pool_submit_aio(flush_worker_cb, state, |
b5513584 SB |
521 | spapr_nvdimm_flush_completion_cb, state); |
522 | } | |
523 | ||
524 | return 0; | |
525 | } | |
526 | ||
527 | static const VMStateDescription vmstate_spapr_nvdimm_flush_state = { | |
528 | .name = "spapr_nvdimm_flush_state", | |
529 | .version_id = 1, | |
530 | .minimum_version_id = 1, | |
078ddbc9 | 531 | .fields = (const VMStateField[]) { |
b5513584 SB |
532 | VMSTATE_UINT64(continue_token, SpaprNVDIMMDeviceFlushState), |
533 | VMSTATE_INT64(hcall_ret, SpaprNVDIMMDeviceFlushState), | |
534 | VMSTATE_UINT32(drcidx, SpaprNVDIMMDeviceFlushState), | |
535 | VMSTATE_END_OF_LIST() | |
536 | }, | |
537 | }; | |
538 | ||
539 | const VMStateDescription vmstate_spapr_nvdimm_states = { | |
540 | .name = "spapr_nvdimm_states", | |
541 | .version_id = 1, | |
542 | .minimum_version_id = 1, | |
543 | .post_load = spapr_nvdimm_flush_post_load, | |
078ddbc9 | 544 | .fields = (const VMStateField[]) { |
8601b4f1 | 545 | VMSTATE_BOOL(hcall_flush_required, SpaprNVDIMMDevice), |
b5513584 SB |
546 | VMSTATE_UINT64(nvdimm_flush_token, SpaprNVDIMMDevice), |
547 | VMSTATE_QLIST_V(completed_nvdimm_flush_states, SpaprNVDIMMDevice, 1, | |
548 | vmstate_spapr_nvdimm_flush_state, | |
549 | SpaprNVDIMMDeviceFlushState, node), | |
550 | VMSTATE_QLIST_V(pending_nvdimm_flush_states, SpaprNVDIMMDevice, 1, | |
551 | vmstate_spapr_nvdimm_flush_state, | |
552 | SpaprNVDIMMDeviceFlushState, node), | |
553 | VMSTATE_END_OF_LIST() | |
554 | }, | |
555 | }; | |
556 | ||
557 | /* | |
558 | * Assign a token and reserve it for the new flush state. | |
559 | */ | |
560 | static SpaprNVDIMMDeviceFlushState *spapr_nvdimm_init_new_flush_state( | |
561 | SpaprNVDIMMDevice *spapr_nvdimm) | |
562 | { | |
563 | SpaprNVDIMMDeviceFlushState *state; | |
564 | ||
565 | state = g_malloc0(sizeof(*state)); | |
566 | ||
567 | spapr_nvdimm->nvdimm_flush_token++; | |
568 | /* Token zero is presumed as no job pending. Assert on overflow to zero */ | |
569 | g_assert(spapr_nvdimm->nvdimm_flush_token != 0); | |
570 | ||
571 | state->continue_token = spapr_nvdimm->nvdimm_flush_token; | |
572 | ||
573 | QLIST_INSERT_HEAD(&spapr_nvdimm->pending_nvdimm_flush_states, state, node); | |
574 | ||
575 | return state; | |
576 | } | |
577 | ||
578 | /* | |
579 | * spapr_nvdimm_finish_flushes | |
580 | * Waits for all pending flush requests to complete | |
581 | * their execution and free the states | |
582 | */ | |
583 | void spapr_nvdimm_finish_flushes(void) | |
584 | { | |
585 | SpaprNVDIMMDeviceFlushState *state, *next; | |
586 | GSList *list, *nvdimms; | |
587 | ||
588 | /* | |
589 | * Called on reset path, the main loop thread which calls | |
590 | * the pending BHs has gotten out running in the reset path, | |
591 | * finally reaching here. Other code path being guest | |
e6a19a64 | 592 | * h_client_architecture_support, that's early boot up. |
b5513584 SB |
593 | */ |
594 | nvdimms = nvdimm_get_device_list(); | |
595 | for (list = nvdimms; list; list = list->next) { | |
596 | NVDIMMDevice *nvdimm = list->data; | |
597 | if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) { | |
598 | SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(nvdimm); | |
599 | while (!QLIST_EMPTY(&s_nvdimm->pending_nvdimm_flush_states)) { | |
600 | aio_poll(qemu_get_aio_context(), true); | |
601 | } | |
602 | ||
603 | QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, | |
604 | node, next) { | |
605 | QLIST_REMOVE(state, node); | |
606 | g_free(state); | |
607 | } | |
608 | } | |
609 | } | |
610 | g_slist_free(nvdimms); | |
611 | } | |
612 | ||
613 | /* | |
614 | * spapr_nvdimm_get_flush_status | |
615 | * Fetches the status of the hcall worker and returns | |
616 | * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running. | |
617 | */ | |
618 | static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice *s_nvdimm, | |
619 | uint64_t token) | |
620 | { | |
621 | SpaprNVDIMMDeviceFlushState *state, *node; | |
622 | ||
623 | QLIST_FOREACH(state, &s_nvdimm->pending_nvdimm_flush_states, node) { | |
624 | if (state->continue_token == token) { | |
625 | return H_LONG_BUSY_ORDER_10_MSEC; | |
626 | } | |
627 | } | |
628 | ||
629 | QLIST_FOREACH_SAFE(state, &s_nvdimm->completed_nvdimm_flush_states, | |
630 | node, node) { | |
631 | if (state->continue_token == token) { | |
632 | int ret = state->hcall_ret; | |
633 | QLIST_REMOVE(state, node); | |
634 | g_free(state); | |
635 | return ret; | |
636 | } | |
637 | } | |
638 | ||
639 | /* If not found in complete list too, invalid token */ | |
640 | return H_P2; | |
641 | } | |
642 | ||
643 | /* | |
644 | * H_SCM_FLUSH | |
645 | * Input: drc_index, continue-token | |
646 | * Out: continue-token | |
647 | * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC, | |
648 | * H_UNSUPPORTED | |
649 | * | |
650 | * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns | |
651 | * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall | |
652 | * needs to be issued multiple times in order to be completely serviced. The | |
653 | * continue-token from the output to be passed in the argument list of | |
654 | * subsequent hcalls until the hcall is completely serviced at which point | |
655 | * H_SUCCESS or other error is returned. | |
656 | */ | |
657 | static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr, | |
658 | target_ulong opcode, target_ulong *args) | |
659 | { | |
660 | int ret; | |
661 | uint32_t drc_index = args[0]; | |
662 | uint64_t continue_token = args[1]; | |
663 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
664 | PCDIMMDevice *dimm; | |
665 | HostMemoryBackend *backend = NULL; | |
666 | SpaprNVDIMMDeviceFlushState *state; | |
b5513584 SB |
667 | int fd; |
668 | ||
669 | if (!drc || !drc->dev || | |
670 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
671 | return H_PARAMETER; | |
672 | } | |
673 | ||
674 | dimm = PC_DIMM(drc->dev); | |
8601b4f1 SB |
675 | if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) { |
676 | return H_PARAMETER; | |
677 | } | |
b5513584 | 678 | if (continue_token == 0) { |
8601b4f1 | 679 | bool is_pmem = false, pmem_override = false; |
b5513584 SB |
680 | backend = MEMORY_BACKEND(dimm->hostmem); |
681 | fd = memory_region_get_fd(&backend->mr); | |
682 | ||
683 | if (fd < 0) { | |
684 | return H_UNSUPPORTED; | |
685 | } | |
686 | ||
8601b4f1 SB |
687 | is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); |
688 | pmem_override = object_property_get_bool(OBJECT(dimm), | |
689 | "pmem-override", NULL); | |
690 | if (is_pmem && !pmem_override) { | |
691 | return H_UNSUPPORTED; | |
692 | } | |
693 | ||
b5513584 SB |
694 | state = spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm)); |
695 | if (!state) { | |
696 | return H_HARDWARE; | |
697 | } | |
698 | ||
699 | state->drcidx = drc_index; | |
700 | ||
aef04fc7 | 701 | thread_pool_submit_aio(flush_worker_cb, state, |
b5513584 SB |
702 | spapr_nvdimm_flush_completion_cb, state); |
703 | ||
704 | continue_token = state->continue_token; | |
705 | } | |
706 | ||
707 | ret = spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm), continue_token); | |
708 | if (H_IS_LONG_BUSY(ret)) { | |
709 | args[0] = continue_token; | |
710 | } | |
711 | ||
712 | return ret; | |
713 | } | |
714 | ||
b5fca656 SB |
715 | static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, |
716 | target_ulong opcode, target_ulong *args) | |
717 | { | |
718 | uint32_t drc_index = args[0]; | |
719 | uint64_t starting_scm_logical_addr = args[1]; | |
720 | uint64_t no_of_scm_blocks_to_unbind = args[2]; | |
721 | uint64_t continue_token = args[3]; | |
722 | uint64_t size_to_unbind; | |
723 | Range blockrange = range_empty; | |
724 | Range nvdimmrange = range_empty; | |
725 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
726 | NVDIMMDevice *nvdimm; | |
727 | uint64_t size, addr; | |
728 | ||
729 | if (!drc || !drc->dev || | |
730 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
731 | return H_PARAMETER; | |
732 | } | |
733 | ||
734 | /* continue_token should be zero as this hcall doesn't return H_BUSY. */ | |
735 | if (continue_token > 0) { | |
736 | return H_P4; | |
737 | } | |
738 | ||
739 | /* Check if starting_scm_logical_addr is block aligned */ | |
740 | if (!QEMU_IS_ALIGNED(starting_scm_logical_addr, | |
741 | SPAPR_MINIMUM_SCM_BLOCK_SIZE)) { | |
742 | return H_P2; | |
743 | } | |
744 | ||
745 | size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE; | |
746 | if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind != | |
747 | size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) { | |
748 | return H_P3; | |
749 | } | |
750 | ||
751 | nvdimm = NVDIMM(drc->dev); | |
752 | size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, | |
753 | &error_abort); | |
754 | addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP, | |
755 | &error_abort); | |
756 | ||
757 | range_init_nofail(&nvdimmrange, addr, size); | |
758 | range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind); | |
759 | ||
760 | if (!range_contains_range(&nvdimmrange, &blockrange)) { | |
761 | return H_P3; | |
762 | } | |
763 | ||
764 | args[1] = no_of_scm_blocks_to_unbind; | |
765 | ||
766 | /* let unplug take care of actual unbind */ | |
767 | return H_SUCCESS; | |
768 | } | |
769 | ||
770 | #define H_UNBIND_SCOPE_ALL 0x1 | |
771 | #define H_UNBIND_SCOPE_DRC 0x2 | |
772 | ||
773 | static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, | |
774 | target_ulong opcode, target_ulong *args) | |
775 | { | |
776 | uint64_t target_scope = args[0]; | |
777 | uint32_t drc_index = args[1]; | |
778 | uint64_t continue_token = args[2]; | |
779 | NVDIMMDevice *nvdimm; | |
780 | uint64_t size; | |
781 | uint64_t no_of_scm_blocks_unbound = 0; | |
782 | ||
783 | /* continue_token should be zero as this hcall doesn't return H_BUSY. */ | |
784 | if (continue_token > 0) { | |
785 | return H_P4; | |
786 | } | |
787 | ||
788 | if (target_scope == H_UNBIND_SCOPE_DRC) { | |
789 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
790 | ||
791 | if (!drc || !drc->dev || | |
792 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
793 | return H_P2; | |
794 | } | |
795 | ||
796 | nvdimm = NVDIMM(drc->dev); | |
797 | size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, | |
798 | &error_abort); | |
799 | ||
800 | no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; | |
801 | } else if (target_scope == H_UNBIND_SCOPE_ALL) { | |
802 | GSList *list, *nvdimms; | |
803 | ||
804 | nvdimms = nvdimm_get_device_list(); | |
805 | for (list = nvdimms; list; list = list->next) { | |
806 | nvdimm = list->data; | |
807 | size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, | |
808 | &error_abort); | |
809 | ||
810 | no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; | |
811 | } | |
812 | g_slist_free(nvdimms); | |
813 | } else { | |
814 | return H_PARAMETER; | |
815 | } | |
816 | ||
817 | args[1] = no_of_scm_blocks_unbound; | |
818 | ||
819 | /* let unplug take care of actual unbind */ | |
820 | return H_SUCCESS; | |
821 | } | |
822 | ||
53d7d7e2 VJ |
823 | static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr, |
824 | target_ulong opcode, target_ulong *args) | |
825 | { | |
826 | ||
827 | NVDIMMDevice *nvdimm; | |
828 | uint64_t hbitmap = 0; | |
829 | uint32_t drc_index = args[0]; | |
830 | SpaprDrc *drc = spapr_drc_by_index(drc_index); | |
831 | const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED; | |
832 | ||
833 | ||
834 | /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */ | |
835 | if (!drc || !drc->dev || | |
836 | spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { | |
837 | return H_PARAMETER; | |
838 | } | |
839 | ||
840 | nvdimm = NVDIMM(drc->dev); | |
841 | ||
842 | /* Update if the nvdimm is unarmed and send its status via health bitmaps */ | |
843 | if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) { | |
844 | hbitmap |= PAPR_PMEM_UNARMED; | |
845 | } | |
846 | ||
847 | /* Update the out args with health bitmap/mask */ | |
848 | args[0] = hbitmap; | |
849 | args[1] = hbitmap_mask; | |
850 | ||
851 | return H_SUCCESS; | |
852 | } | |
853 | ||
b5fca656 SB |
854 | static void spapr_scm_register_types(void) |
855 | { | |
856 | /* qemu/scm specific hcalls */ | |
857 | spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata); | |
858 | spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata); | |
859 | spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); | |
860 | spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); | |
861 | spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); | |
53d7d7e2 | 862 | spapr_register_hypercall(H_SCM_HEALTH, h_scm_health); |
b5513584 | 863 | spapr_register_hypercall(H_SCM_FLUSH, h_scm_flush); |
b5fca656 SB |
864 | } |
865 | ||
866 | type_init(spapr_scm_register_types) | |
8601b4f1 SB |
867 | |
868 | static void spapr_nvdimm_realize(NVDIMMDevice *dimm, Error **errp) | |
869 | { | |
870 | SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(dimm); | |
871 | HostMemoryBackend *backend = MEMORY_BACKEND(PC_DIMM(dimm)->hostmem); | |
872 | bool is_pmem = object_property_get_bool(OBJECT(backend), "pmem", NULL); | |
873 | bool pmem_override = object_property_get_bool(OBJECT(dimm), "pmem-override", | |
874 | NULL); | |
875 | if (!is_pmem || pmem_override) { | |
876 | s_nvdimm->hcall_flush_required = true; | |
877 | } | |
878 | ||
99b16e8e | 879 | vmstate_register_any(NULL, &vmstate_spapr_nvdimm_states, dimm); |
8601b4f1 SB |
880 | } |
881 | ||
882 | static void spapr_nvdimm_unrealize(NVDIMMDevice *dimm) | |
883 | { | |
884 | vmstate_unregister(NULL, &vmstate_spapr_nvdimm_states, dimm); | |
885 | } | |
886 | ||
887 | static Property spapr_nvdimm_properties[] = { | |
888 | #ifdef CONFIG_LIBPMEM | |
889 | DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice, pmem_override, false), | |
890 | #endif | |
891 | DEFINE_PROP_END_OF_LIST(), | |
892 | }; | |
893 | ||
894 | static void spapr_nvdimm_class_init(ObjectClass *oc, void *data) | |
895 | { | |
896 | DeviceClass *dc = DEVICE_CLASS(oc); | |
897 | NVDIMMClass *nvc = NVDIMM_CLASS(oc); | |
898 | ||
899 | nvc->realize = spapr_nvdimm_realize; | |
900 | nvc->unrealize = spapr_nvdimm_unrealize; | |
901 | ||
902 | device_class_set_props(dc, spapr_nvdimm_properties); | |
903 | } | |
904 | ||
905 | static void spapr_nvdimm_init(Object *obj) | |
906 | { | |
907 | SpaprNVDIMMDevice *s_nvdimm = SPAPR_NVDIMM(obj); | |
908 | ||
909 | s_nvdimm->hcall_flush_required = false; | |
910 | QLIST_INIT(&s_nvdimm->pending_nvdimm_flush_states); | |
911 | QLIST_INIT(&s_nvdimm->completed_nvdimm_flush_states); | |
912 | } | |
913 | ||
914 | static TypeInfo spapr_nvdimm_info = { | |
915 | .name = TYPE_SPAPR_NVDIMM, | |
916 | .parent = TYPE_NVDIMM, | |
917 | .class_init = spapr_nvdimm_class_init, | |
918 | .class_size = sizeof(SPAPRNVDIMMClass), | |
919 | .instance_size = sizeof(SpaprNVDIMMDevice), | |
920 | .instance_init = spapr_nvdimm_init, | |
921 | }; | |
922 | ||
923 | static void spapr_nvdimm_register_types(void) | |
924 | { | |
925 | type_register_static(&spapr_nvdimm_info); | |
926 | } | |
927 | ||
928 | type_init(spapr_nvdimm_register_types) |