]> git.proxmox.com Git - mirror_qemu.git/blame - hw/ppc/spapr.c
target/ppc/tcg: make spapr_caps apply cap-[cfpc/sbbc/ibs] non-fatal for tcg
[mirror_qemu.git] / hw / ppc / spapr.c
CommitLineData
9fdf0c29
DG
1/*
2 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3 *
4 * Copyright (c) 2004-2007 Fabrice Bellard
5 * Copyright (c) 2007 Jocelyn Mayer
6 * Copyright (c) 2010 David Gibson, IBM Corporation.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 *
26 */
0d75590d 27#include "qemu/osdep.h"
da34e65c 28#include "qapi/error.h"
fa98fbfc 29#include "qapi/visitor.h"
9c17d615 30#include "sysemu/sysemu.h"
e35704ba 31#include "sysemu/numa.h"
23ff81bd 32#include "sysemu/qtest.h"
83c9f4ca 33#include "hw/hw.h"
03dd024f 34#include "qemu/log.h"
71461b0f 35#include "hw/fw-path-provider.h"
9fdf0c29 36#include "elf.h"
1422e32d 37#include "net/net.h"
ad440b4a 38#include "sysemu/device_tree.h"
9c17d615 39#include "sysemu/cpus.h"
b3946626 40#include "sysemu/hw_accel.h"
e97c3636 41#include "kvm_ppc.h"
c4b63b7c 42#include "migration/misc.h"
84a899de 43#include "migration/global_state.h"
f2a8f0a6 44#include "migration/register.h"
4be21d56 45#include "mmu-hash64.h"
b4db5413 46#include "mmu-book3s-v3.h"
7abd43ba 47#include "cpu-models.h"
3794d548 48#include "qom/cpu.h"
9fdf0c29
DG
49
50#include "hw/boards.h"
0d09e41a 51#include "hw/ppc/ppc.h"
9fdf0c29
DG
52#include "hw/loader.h"
53
7804c353 54#include "hw/ppc/fdt.h"
0d09e41a
PB
55#include "hw/ppc/spapr.h"
56#include "hw/ppc/spapr_vio.h"
57#include "hw/pci-host/spapr.h"
a2cb15b0 58#include "hw/pci/msi.h"
9fdf0c29 59
83c9f4ca 60#include "hw/pci/pci.h"
71461b0f
AK
61#include "hw/scsi/scsi.h"
62#include "hw/virtio/virtio-scsi.h"
c4e13492 63#include "hw/virtio/vhost-scsi-common.h"
f61b4bed 64
022c62cb 65#include "exec/address-spaces.h"
2309832a 66#include "exec/ram_addr.h"
35139a59 67#include "hw/usb.h"
1de7afc9 68#include "qemu/config-file.h"
135a129a 69#include "qemu/error-report.h"
2a6593cb 70#include "trace.h"
34316482 71#include "hw/nmi.h"
6449da45 72#include "hw/intc/intc.h"
890c2b77 73
f348b6d1 74#include "qemu/cutils.h"
94a94e4c 75#include "hw/ppc/spapr_cpu_core.h"
2cc0e2e8 76#include "hw/mem/memory-device.h"
68a27b20 77
9fdf0c29
DG
78#include <libfdt.h>
79
4d8d5467
BH
80/* SLOF memory layout:
81 *
82 * SLOF raw image loaded at 0, copies its romfs right below the flat
83 * device-tree, then position SLOF itself 31M below that
84 *
85 * So we set FW_OVERHEAD to 40MB which should account for all of that
86 * and more
87 *
88 * We load our kernel at 4M, leaving space for SLOF initial image
89 */
38b02bd8 90#define FDT_MAX_SIZE 0x100000
39ac8455 91#define RTAS_MAX_SIZE 0x10000
b7d1f77a 92#define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */
a9f8ad8f
DG
93#define FW_MAX_SIZE 0x400000
94#define FW_FILE_NAME "slof.bin"
4d8d5467
BH
95#define FW_OVERHEAD 0x2800000
96#define KERNEL_LOAD_ADDR FW_MAX_SIZE
a9f8ad8f 97
4d8d5467 98#define MIN_RMA_SLOF 128UL
9fdf0c29 99
5c7adcf4 100#define PHANDLE_INTC 0x00001111
0c103f8e 101
5d0fb150
GK
102/* These two functions implement the VCPU id numbering: one to compute them
103 * all and one to identify thread 0 of a VCORE. Any change to the first one
104 * is likely to have an impact on the second one, so let's keep them close.
105 */
106static int spapr_vcpu_id(sPAPRMachineState *spapr, int cpu_index)
107{
1a5008fc 108 assert(spapr->vsmt);
5d0fb150
GK
109 return
110 (cpu_index / smp_threads) * spapr->vsmt + cpu_index % smp_threads;
111}
112static bool spapr_is_thread0_in_vcore(sPAPRMachineState *spapr,
113 PowerPCCPU *cpu)
114{
1a5008fc 115 assert(spapr->vsmt);
5d0fb150
GK
116 return spapr_get_vcpu_id(cpu) % spapr->vsmt == 0;
117}
118
46f7afa3
GK
119static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque)
120{
121 /* Dummy entries correspond to unused ICPState objects in older QEMUs,
122 * and newer QEMUs don't even have them. In both cases, we don't want
123 * to send anything on the wire.
124 */
125 return false;
126}
127
128static const VMStateDescription pre_2_10_vmstate_dummy_icp = {
129 .name = "icp/server",
130 .version_id = 1,
131 .minimum_version_id = 1,
132 .needed = pre_2_10_vmstate_dummy_icp_needed,
133 .fields = (VMStateField[]) {
134 VMSTATE_UNUSED(4), /* uint32_t xirr */
135 VMSTATE_UNUSED(1), /* uint8_t pending_priority */
136 VMSTATE_UNUSED(1), /* uint8_t mfrr */
137 VMSTATE_END_OF_LIST()
138 },
139};
140
141static void pre_2_10_vmstate_register_dummy_icp(int i)
142{
143 vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp,
144 (void *)(uintptr_t) i);
145}
146
147static void pre_2_10_vmstate_unregister_dummy_icp(int i)
148{
149 vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp,
150 (void *)(uintptr_t) i);
151}
152
1a518e76 153int spapr_max_server_number(sPAPRMachineState *spapr)
46f7afa3 154{
1a5008fc 155 assert(spapr->vsmt);
72194664 156 return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads);
46f7afa3
GK
157}
158
833d4668
AK
159static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
160 int smt_threads)
161{
162 int i, ret = 0;
163 uint32_t servers_prop[smt_threads];
164 uint32_t gservers_prop[smt_threads * 2];
14bb4486 165 int index = spapr_get_vcpu_id(cpu);
833d4668 166
d6e166c0
DG
167 if (cpu->compat_pvr) {
168 ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->compat_pvr);
6d9412ea
AK
169 if (ret < 0) {
170 return ret;
171 }
172 }
173
833d4668
AK
174 /* Build interrupt servers and gservers properties */
175 for (i = 0; i < smt_threads; i++) {
176 servers_prop[i] = cpu_to_be32(index + i);
177 /* Hack, direct the group queues back to cpu 0 */
178 gservers_prop[i*2] = cpu_to_be32(index + i);
179 gservers_prop[i*2 + 1] = 0;
180 }
181 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
182 servers_prop, sizeof(servers_prop));
183 if (ret < 0) {
184 return ret;
185 }
186 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
187 gservers_prop, sizeof(gservers_prop));
188
189 return ret;
190}
191
99861ecb 192static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
0da6f3fe 193{
14bb4486 194 int index = spapr_get_vcpu_id(cpu);
0da6f3fe
BR
195 uint32_t associativity[] = {cpu_to_be32(0x5),
196 cpu_to_be32(0x0),
197 cpu_to_be32(0x0),
198 cpu_to_be32(0x0),
15f8b142 199 cpu_to_be32(cpu->node_id),
0da6f3fe
BR
200 cpu_to_be32(index)};
201
202 /* Advertise NUMA via ibm,associativity */
99861ecb 203 return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
0da6f3fe 204 sizeof(associativity));
0da6f3fe
BR
205}
206
86d5771a 207/* Populate the "ibm,pa-features" property */
ee76a09f
DG
208static void spapr_populate_pa_features(sPAPRMachineState *spapr,
209 PowerPCCPU *cpu,
210 void *fdt, int offset,
7abd43ba 211 bool legacy_guest)
86d5771a
SB
212{
213 uint8_t pa_features_206[] = { 6, 0,
214 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
215 uint8_t pa_features_207[] = { 24, 0,
216 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
217 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
218 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
219 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
9fb4541f
SB
220 uint8_t pa_features_300[] = { 66, 0,
221 /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */
222 /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */
223 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */
224 /* 6: DS207 */
225 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */
226 /* 16: Vector */
86d5771a 227 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
9fb4541f 228 /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */
9bf502fe 229 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */
9fb4541f
SB
230 /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */
231 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */
232 /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */
233 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */
234 /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */
235 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */
236 /* 42: PM, 44: PC RA, 46: SC vec'd */
237 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */
238 /* 48: SIMD, 50: QP BFP, 52: String */
239 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */
240 /* 54: DecFP, 56: DecI, 58: SHA */
241 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */
242 /* 60: NM atomic, 62: RNG */
243 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */
244 };
7abd43ba 245 uint8_t *pa_features = NULL;
86d5771a
SB
246 size_t pa_size;
247
7abd43ba 248 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06, 0, cpu->compat_pvr)) {
86d5771a
SB
249 pa_features = pa_features_206;
250 pa_size = sizeof(pa_features_206);
7abd43ba
SJS
251 }
252 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, cpu->compat_pvr)) {
86d5771a
SB
253 pa_features = pa_features_207;
254 pa_size = sizeof(pa_features_207);
7abd43ba
SJS
255 }
256 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, cpu->compat_pvr)) {
86d5771a
SB
257 pa_features = pa_features_300;
258 pa_size = sizeof(pa_features_300);
7abd43ba
SJS
259 }
260 if (!pa_features) {
86d5771a
SB
261 return;
262 }
263
26cd35b8 264 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
86d5771a
SB
265 /*
266 * Note: we keep CI large pages off by default because a 64K capable
267 * guest provisioned with large pages might otherwise try to map a qemu
268 * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
269 * even if that qemu runs on a 4k host.
270 * We dd this bit back here if we are confident this is not an issue
271 */
272 pa_features[3] |= 0x20;
273 }
4e5fe368 274 if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) {
86d5771a
SB
275 pa_features[24] |= 0x80; /* Transactional memory support */
276 }
e957f6a9
SB
277 if (legacy_guest && pa_size > 40) {
278 /* Workaround for broken kernels that attempt (guest) radix
279 * mode when they can't handle it, if they see the radix bit set
280 * in pa-features. So hide it from them. */
281 pa_features[40 + 2] &= ~0x80; /* Radix MMU */
282 }
86d5771a
SB
283
284 _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
285}
286
28e02042 287static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
6e806cc3 288{
82677ed2
AK
289 int ret = 0, offset, cpus_offset;
290 CPUState *cs;
6e806cc3 291 char cpu_model[32];
7f763a5d 292 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
6e806cc3 293
82677ed2
AK
294 CPU_FOREACH(cs) {
295 PowerPCCPU *cpu = POWERPC_CPU(cs);
296 DeviceClass *dc = DEVICE_GET_CLASS(cs);
14bb4486 297 int index = spapr_get_vcpu_id(cpu);
abbc1247 298 int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
6e806cc3 299
5d0fb150 300 if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
6e806cc3
BR
301 continue;
302 }
303
82677ed2 304 snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
6e806cc3 305
82677ed2
AK
306 cpus_offset = fdt_path_offset(fdt, "/cpus");
307 if (cpus_offset < 0) {
a4f3885c 308 cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
82677ed2
AK
309 if (cpus_offset < 0) {
310 return cpus_offset;
311 }
312 }
313 offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
6e806cc3 314 if (offset < 0) {
82677ed2
AK
315 offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
316 if (offset < 0) {
317 return offset;
318 }
6e806cc3
BR
319 }
320
7f763a5d
DG
321 ret = fdt_setprop(fdt, offset, "ibm,pft-size",
322 pft_size_prop, sizeof(pft_size_prop));
6e806cc3
BR
323 if (ret < 0) {
324 return ret;
325 }
833d4668 326
99861ecb
IM
327 if (nb_numa_nodes > 1) {
328 ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu);
329 if (ret < 0) {
330 return ret;
331 }
0da6f3fe
BR
332 }
333
12dbeb16 334 ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt);
833d4668
AK
335 if (ret < 0) {
336 return ret;
337 }
e957f6a9 338
ee76a09f
DG
339 spapr_populate_pa_features(spapr, cpu, fdt, offset,
340 spapr->cas_legacy_guest_workaround);
6e806cc3
BR
341 }
342 return ret;
343}
344
c86c1aff 345static hwaddr spapr_node0_size(MachineState *machine)
b082d65a
AK
346{
347 if (nb_numa_nodes) {
348 int i;
349 for (i = 0; i < nb_numa_nodes; ++i) {
350 if (numa_info[i].node_mem) {
fb164994
DG
351 return MIN(pow2floor(numa_info[i].node_mem),
352 machine->ram_size);
b082d65a
AK
353 }
354 }
355 }
fb164994 356 return machine->ram_size;
b082d65a
AK
357}
358
a1d59c0f
AK
359static void add_str(GString *s, const gchar *s1)
360{
361 g_string_append_len(s, s1, strlen(s1) + 1);
362}
7f763a5d 363
03d196b7 364static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
26a8c353
AK
365 hwaddr size)
366{
367 uint32_t associativity[] = {
368 cpu_to_be32(0x4), /* length */
369 cpu_to_be32(0x0), cpu_to_be32(0x0),
c3b4f589 370 cpu_to_be32(0x0), cpu_to_be32(nodeid)
26a8c353
AK
371 };
372 char mem_name[32];
373 uint64_t mem_reg_property[2];
374 int off;
375
376 mem_reg_property[0] = cpu_to_be64(start);
377 mem_reg_property[1] = cpu_to_be64(size);
378
379 sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
380 off = fdt_add_subnode(fdt, 0, mem_name);
381 _FDT(off);
382 _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
383 _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
384 sizeof(mem_reg_property))));
385 _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
386 sizeof(associativity))));
03d196b7 387 return off;
26a8c353
AK
388}
389
28e02042 390static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
7f763a5d 391{
fb164994 392 MachineState *machine = MACHINE(spapr);
7db8a127
AK
393 hwaddr mem_start, node_size;
394 int i, nb_nodes = nb_numa_nodes;
395 NodeInfo *nodes = numa_info;
396 NodeInfo ramnode;
397
398 /* No NUMA nodes, assume there is just one node with whole RAM */
399 if (!nb_numa_nodes) {
400 nb_nodes = 1;
fb164994 401 ramnode.node_mem = machine->ram_size;
7db8a127 402 nodes = &ramnode;
5fe269b1 403 }
7f763a5d 404
7db8a127
AK
405 for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
406 if (!nodes[i].node_mem) {
407 continue;
408 }
fb164994 409 if (mem_start >= machine->ram_size) {
5fe269b1
PM
410 node_size = 0;
411 } else {
7db8a127 412 node_size = nodes[i].node_mem;
fb164994
DG
413 if (node_size > machine->ram_size - mem_start) {
414 node_size = machine->ram_size - mem_start;
5fe269b1
PM
415 }
416 }
7db8a127 417 if (!mem_start) {
b472b1a7
DHB
418 /* spapr_machine_init() checks for rma_size <= node0_size
419 * already */
e8f986fc 420 spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
7db8a127
AK
421 mem_start += spapr->rma_size;
422 node_size -= spapr->rma_size;
423 }
6010818c
AK
424 for ( ; node_size; ) {
425 hwaddr sizetmp = pow2floor(node_size);
426
427 /* mem_start != 0 here */
428 if (ctzl(mem_start) < ctzl(sizetmp)) {
429 sizetmp = 1ULL << ctzl(mem_start);
430 }
431
432 spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
433 node_size -= sizetmp;
434 mem_start += sizetmp;
435 }
7f763a5d
DG
436 }
437
438 return 0;
439}
440
0da6f3fe
BR
441static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
442 sPAPRMachineState *spapr)
443{
444 PowerPCCPU *cpu = POWERPC_CPU(cs);
445 CPUPPCState *env = &cpu->env;
446 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
14bb4486 447 int index = spapr_get_vcpu_id(cpu);
0da6f3fe
BR
448 uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
449 0xffffffff, 0xffffffff};
afd10a0f
BR
450 uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
451 : SPAPR_TIMEBASE_FREQ;
0da6f3fe
BR
452 uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
453 uint32_t page_sizes_prop[64];
454 size_t page_sizes_prop_size;
22419c2a 455 uint32_t vcpus_per_socket = smp_threads * smp_cores;
0da6f3fe 456 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
abbc1247 457 int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
af81cf32 458 sPAPRDRConnector *drc;
af81cf32 459 int drc_index;
c64abd1f
SB
460 uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
461 int i;
af81cf32 462
fbf55397 463 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
af81cf32 464 if (drc) {
0b55aa91 465 drc_index = spapr_drc_index(drc);
af81cf32
BR
466 _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
467 }
0da6f3fe
BR
468
469 _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
470 _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
471
472 _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
473 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
474 env->dcache_line_size)));
475 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
476 env->dcache_line_size)));
477 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
478 env->icache_line_size)));
479 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
480 env->icache_line_size)));
481
482 if (pcc->l1_dcache_size) {
483 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
484 pcc->l1_dcache_size)));
485 } else {
3dc6f869 486 warn_report("Unknown L1 dcache size for cpu");
0da6f3fe
BR
487 }
488 if (pcc->l1_icache_size) {
489 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
490 pcc->l1_icache_size)));
491 } else {
3dc6f869 492 warn_report("Unknown L1 icache size for cpu");
0da6f3fe
BR
493 }
494
495 _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
496 _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
67d7d66f
DG
497 _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size)));
498 _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
0da6f3fe
BR
499 _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
500 _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
501
502 if (env->spr_cb[SPR_PURR].oea_read) {
503 _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
504 }
505
58969eee 506 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
0da6f3fe
BR
507 _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
508 segs, sizeof(segs))));
509 }
510
29386642 511 /* Advertise VSX (vector extensions) if available
0da6f3fe 512 * 1 == VMX / Altivec available
29386642
DG
513 * 2 == VSX available
514 *
515 * Only CPUs for which we create core types in spapr_cpu_core.c
516 * are possible, and all of those have VMX */
4e5fe368 517 if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
29386642
DG
518 _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
519 } else {
520 _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
0da6f3fe
BR
521 }
522
523 /* Advertise DFP (Decimal Floating Point) if available
524 * 0 / no property == no DFP
525 * 1 == DFP available */
4e5fe368 526 if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
0da6f3fe
BR
527 _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
528 }
529
644a2c99
DG
530 page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
531 sizeof(page_sizes_prop));
0da6f3fe
BR
532 if (page_sizes_prop_size) {
533 _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
534 page_sizes_prop, page_sizes_prop_size)));
535 }
536
ee76a09f 537 spapr_populate_pa_features(spapr, cpu, fdt, offset, false);
90da0d5a 538
0da6f3fe 539 _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
22419c2a 540 cs->cpu_index / vcpus_per_socket)));
0da6f3fe
BR
541
542 _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
543 pft_size_prop, sizeof(pft_size_prop))));
544
99861ecb
IM
545 if (nb_numa_nodes > 1) {
546 _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
547 }
0da6f3fe 548
12dbeb16 549 _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
c64abd1f
SB
550
551 if (pcc->radix_page_info) {
552 for (i = 0; i < pcc->radix_page_info->count; i++) {
553 radix_AP_encodings[i] =
554 cpu_to_be32(pcc->radix_page_info->entries[i]);
555 }
556 _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
557 radix_AP_encodings,
558 pcc->radix_page_info->count *
559 sizeof(radix_AP_encodings[0]))));
560 }
a8dafa52
SJS
561
562 /*
563 * We set this property to let the guest know that it can use the large
564 * decrementer and its width in bits.
565 */
566 if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF)
567 _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
568 pcc->lrg_decr_bits)));
0da6f3fe
BR
569}
570
571static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
572{
04d595b3 573 CPUState **rev;
0da6f3fe 574 CPUState *cs;
04d595b3 575 int n_cpus;
0da6f3fe
BR
576 int cpus_offset;
577 char *nodename;
04d595b3 578 int i;
0da6f3fe
BR
579
580 cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
581 _FDT(cpus_offset);
582 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
583 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
584
585 /*
586 * We walk the CPUs in reverse order to ensure that CPU DT nodes
587 * created by fdt_add_subnode() end up in the right order in FDT
588 * for the guest kernel the enumerate the CPUs correctly.
04d595b3
EC
589 *
590 * The CPU list cannot be traversed in reverse order, so we need
591 * to do extra work.
0da6f3fe 592 */
04d595b3
EC
593 n_cpus = 0;
594 rev = NULL;
595 CPU_FOREACH(cs) {
596 rev = g_renew(CPUState *, rev, n_cpus + 1);
597 rev[n_cpus++] = cs;
598 }
599
600 for (i = n_cpus - 1; i >= 0; i--) {
601 CPUState *cs = rev[i];
0da6f3fe 602 PowerPCCPU *cpu = POWERPC_CPU(cs);
14bb4486 603 int index = spapr_get_vcpu_id(cpu);
0da6f3fe
BR
604 DeviceClass *dc = DEVICE_GET_CLASS(cs);
605 int offset;
606
5d0fb150 607 if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
0da6f3fe
BR
608 continue;
609 }
610
611 nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
612 offset = fdt_add_subnode(fdt, cpus_offset, nodename);
613 g_free(nodename);
614 _FDT(offset);
615 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
616 }
617
eceba347 618 g_free(rev);
0da6f3fe
BR
619}
620
0e947a89
TH
621static int spapr_rng_populate_dt(void *fdt)
622{
623 int node;
624 int ret;
625
626 node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities");
627 if (node <= 0) {
628 return -1;
629 }
630 ret = fdt_setprop_string(fdt, node, "device_type",
631 "ibm,platform-facilities");
632 ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1);
633 ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0);
634
635 node = fdt_add_subnode(fdt, node, "ibm,random-v1");
636 if (node <= 0) {
637 return -1;
638 }
639 ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random");
640
641 return ret ? -1 : 0;
642}
643
f47bd1c8
IM
644static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
645{
646 MemoryDeviceInfoList *info;
647
648 for (info = list; info; info = info->next) {
649 MemoryDeviceInfo *value = info->value;
650
651 if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
652 PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
653
ccc2cef8 654 if (addr >= pcdimm_info->addr &&
f47bd1c8
IM
655 addr < (pcdimm_info->addr + pcdimm_info->size)) {
656 return pcdimm_info->node;
657 }
658 }
659 }
660
661 return -1;
662}
663
a324d6f1
BR
664struct sPAPRDrconfCellV2 {
665 uint32_t seq_lmbs;
666 uint64_t base_addr;
667 uint32_t drc_index;
668 uint32_t aa_index;
669 uint32_t flags;
670} QEMU_PACKED;
671
672typedef struct DrconfCellQueue {
673 struct sPAPRDrconfCellV2 cell;
674 QSIMPLEQ_ENTRY(DrconfCellQueue) entry;
675} DrconfCellQueue;
676
677static DrconfCellQueue *
678spapr_get_drconf_cell(uint32_t seq_lmbs, uint64_t base_addr,
679 uint32_t drc_index, uint32_t aa_index,
680 uint32_t flags)
03d196b7 681{
a324d6f1
BR
682 DrconfCellQueue *elem;
683
684 elem = g_malloc0(sizeof(*elem));
685 elem->cell.seq_lmbs = cpu_to_be32(seq_lmbs);
686 elem->cell.base_addr = cpu_to_be64(base_addr);
687 elem->cell.drc_index = cpu_to_be32(drc_index);
688 elem->cell.aa_index = cpu_to_be32(aa_index);
689 elem->cell.flags = cpu_to_be32(flags);
690
691 return elem;
692}
693
694/* ibm,dynamic-memory-v2 */
695static int spapr_populate_drmem_v2(sPAPRMachineState *spapr, void *fdt,
696 int offset, MemoryDeviceInfoList *dimms)
697{
b0c14ec4 698 MachineState *machine = MACHINE(spapr);
cc941111 699 uint8_t *int_buf, *cur_index;
a324d6f1
BR
700 int ret;
701 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
702 uint64_t addr, cur_addr, size;
b0c14ec4
DH
703 uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size);
704 uint64_t mem_end = machine->device_memory->base +
705 memory_region_size(&machine->device_memory->mr);
cc941111 706 uint32_t node, buf_len, nr_entries = 0;
a324d6f1
BR
707 sPAPRDRConnector *drc;
708 DrconfCellQueue *elem, *next;
709 MemoryDeviceInfoList *info;
710 QSIMPLEQ_HEAD(, DrconfCellQueue) drconf_queue
711 = QSIMPLEQ_HEAD_INITIALIZER(drconf_queue);
712
713 /* Entry to cover RAM and the gap area */
714 elem = spapr_get_drconf_cell(nr_boot_lmbs, 0, 0, -1,
715 SPAPR_LMB_FLAGS_RESERVED |
716 SPAPR_LMB_FLAGS_DRC_INVALID);
717 QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
718 nr_entries++;
719
b0c14ec4 720 cur_addr = machine->device_memory->base;
a324d6f1
BR
721 for (info = dimms; info; info = info->next) {
722 PCDIMMDeviceInfo *di = info->value->u.dimm.data;
723
724 addr = di->addr;
725 size = di->size;
726 node = di->node;
727
728 /* Entry for hot-pluggable area */
729 if (cur_addr < addr) {
730 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
731 g_assert(drc);
732 elem = spapr_get_drconf_cell((addr - cur_addr) / lmb_size,
733 cur_addr, spapr_drc_index(drc), -1, 0);
734 QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
735 nr_entries++;
736 }
737
738 /* Entry for DIMM */
739 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / lmb_size);
740 g_assert(drc);
741 elem = spapr_get_drconf_cell(size / lmb_size, addr,
742 spapr_drc_index(drc), node,
743 SPAPR_LMB_FLAGS_ASSIGNED);
744 QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
745 nr_entries++;
746 cur_addr = addr + size;
747 }
748
749 /* Entry for remaining hotpluggable area */
750 if (cur_addr < mem_end) {
751 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
752 g_assert(drc);
753 elem = spapr_get_drconf_cell((mem_end - cur_addr) / lmb_size,
754 cur_addr, spapr_drc_index(drc), -1, 0);
755 QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
756 nr_entries++;
757 }
758
759 buf_len = nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t);
760 int_buf = cur_index = g_malloc0(buf_len);
761 *(uint32_t *)int_buf = cpu_to_be32(nr_entries);
762 cur_index += sizeof(nr_entries);
763
764 QSIMPLEQ_FOREACH_SAFE(elem, &drconf_queue, entry, next) {
765 memcpy(cur_index, &elem->cell, sizeof(elem->cell));
766 cur_index += sizeof(elem->cell);
767 QSIMPLEQ_REMOVE(&drconf_queue, elem, DrconfCellQueue, entry);
768 g_free(elem);
769 }
770
771 ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory-v2", int_buf, buf_len);
772 g_free(int_buf);
773 if (ret < 0) {
774 return -1;
775 }
776 return 0;
777}
778
779/* ibm,dynamic-memory */
780static int spapr_populate_drmem_v1(sPAPRMachineState *spapr, void *fdt,
781 int offset, MemoryDeviceInfoList *dimms)
782{
b0c14ec4 783 MachineState *machine = MACHINE(spapr);
a324d6f1 784 int i, ret;
03d196b7 785 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
0c9269a5 786 uint32_t device_lmb_start = machine->device_memory->base / lmb_size;
b0c14ec4
DH
787 uint32_t nr_lmbs = (machine->device_memory->base +
788 memory_region_size(&machine->device_memory->mr)) /
d0e5a8f2 789 lmb_size;
03d196b7 790 uint32_t *int_buf, *cur_index, buf_len;
16c25aef 791
ef001f06
TH
792 /*
793 * Allocate enough buffer size to fit in ibm,dynamic-memory
ef001f06 794 */
a324d6f1 795 buf_len = (nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1) * sizeof(uint32_t);
03d196b7 796 cur_index = int_buf = g_malloc0(buf_len);
03d196b7
BR
797 int_buf[0] = cpu_to_be32(nr_lmbs);
798 cur_index++;
799 for (i = 0; i < nr_lmbs; i++) {
d0e5a8f2 800 uint64_t addr = i * lmb_size;
03d196b7
BR
801 uint32_t *dynamic_memory = cur_index;
802
0c9269a5 803 if (i >= device_lmb_start) {
d0e5a8f2 804 sPAPRDRConnector *drc;
d0e5a8f2 805
fbf55397 806 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, i);
d0e5a8f2 807 g_assert(drc);
d0e5a8f2
BR
808
809 dynamic_memory[0] = cpu_to_be32(addr >> 32);
810 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
0b55aa91 811 dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
d0e5a8f2 812 dynamic_memory[3] = cpu_to_be32(0); /* reserved */
f47bd1c8 813 dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
d0e5a8f2
BR
814 if (memory_region_present(get_system_memory(), addr)) {
815 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
816 } else {
817 dynamic_memory[5] = cpu_to_be32(0);
818 }
03d196b7 819 } else {
d0e5a8f2
BR
820 /*
821 * LMB information for RMA, boot time RAM and gap b/n RAM and
0c9269a5 822 * device memory region -- all these are marked as reserved
d0e5a8f2
BR
823 * and as having no valid DRC.
824 */
825 dynamic_memory[0] = cpu_to_be32(addr >> 32);
826 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
827 dynamic_memory[2] = cpu_to_be32(0);
828 dynamic_memory[3] = cpu_to_be32(0); /* reserved */
829 dynamic_memory[4] = cpu_to_be32(-1);
830 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED |
831 SPAPR_LMB_FLAGS_DRC_INVALID);
03d196b7
BR
832 }
833
834 cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
835 }
836 ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
a324d6f1 837 g_free(int_buf);
03d196b7 838 if (ret < 0) {
a324d6f1
BR
839 return -1;
840 }
841 return 0;
842}
843
844/*
845 * Adds ibm,dynamic-reconfiguration-memory node.
846 * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
847 * of this device tree node.
848 */
849static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
850{
851 MachineState *machine = MACHINE(spapr);
852 int ret, i, offset;
853 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
854 uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
855 uint32_t *int_buf, *cur_index, buf_len;
856 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
857 MemoryDeviceInfoList *dimms = NULL;
858
859 /*
0c9269a5 860 * Don't create the node if there is no device memory
a324d6f1
BR
861 */
862 if (machine->ram_size == machine->maxram_size) {
863 return 0;
864 }
865
866 offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
867
868 ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
869 sizeof(prop_lmb_size));
870 if (ret < 0) {
871 return ret;
872 }
873
874 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
875 if (ret < 0) {
876 return ret;
877 }
878
879 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
880 if (ret < 0) {
881 return ret;
882 }
883
884 /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */
2cc0e2e8 885 dimms = qmp_memory_device_list();
a324d6f1
BR
886 if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) {
887 ret = spapr_populate_drmem_v2(spapr, fdt, offset, dimms);
888 } else {
889 ret = spapr_populate_drmem_v1(spapr, fdt, offset, dimms);
890 }
891 qapi_free_MemoryDeviceInfoList(dimms);
892
893 if (ret < 0) {
894 return ret;
03d196b7
BR
895 }
896
897 /* ibm,associativity-lookup-arrays */
a324d6f1
BR
898 buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
899 cur_index = int_buf = g_malloc0(buf_len);
6663864e 900 int_buf[0] = cpu_to_be32(nr_nodes);
03d196b7
BR
901 int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
902 cur_index += 2;
6663864e 903 for (i = 0; i < nr_nodes; i++) {
03d196b7
BR
904 uint32_t associativity[] = {
905 cpu_to_be32(0x0),
906 cpu_to_be32(0x0),
907 cpu_to_be32(0x0),
908 cpu_to_be32(i)
909 };
910 memcpy(cur_index, associativity, sizeof(associativity));
911 cur_index += 4;
912 }
913 ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
914 (cur_index - int_buf) * sizeof(uint32_t));
03d196b7 915 g_free(int_buf);
a324d6f1 916
03d196b7
BR
917 return ret;
918}
919
6787d27b
MR
920static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt,
921 sPAPROptionVector *ov5_updates)
922{
923 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
417ece33 924 int ret = 0, offset;
6787d27b
MR
925
926 /* Generate ibm,dynamic-reconfiguration-memory node if required */
927 if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) {
928 g_assert(smc->dr_lmb_enabled);
929 ret = spapr_populate_drconf_memory(spapr, fdt);
417ece33
MR
930 if (ret) {
931 goto out;
932 }
6787d27b
MR
933 }
934
417ece33
MR
935 offset = fdt_path_offset(fdt, "/chosen");
936 if (offset < 0) {
937 offset = fdt_add_subnode(fdt, 0, "chosen");
938 if (offset < 0) {
939 return offset;
940 }
941 }
942 ret = spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas,
943 "ibm,architecture-vec-5");
944
945out:
6787d27b
MR
946 return ret;
947}
948
10f12e64
DHB
949static bool spapr_hotplugged_dev_before_cas(void)
950{
951 Object *drc_container, *obj;
952 ObjectProperty *prop;
953 ObjectPropertyIterator iter;
954
955 drc_container = container_get(object_get_root(), "/dr-connector");
956 object_property_iter_init(&iter, drc_container);
957 while ((prop = object_property_iter_next(&iter))) {
958 if (!strstart(prop->type, "link<", NULL)) {
959 continue;
960 }
961 obj = object_property_get_link(drc_container, prop->name, NULL);
962 if (spapr_drc_needed(obj)) {
963 return true;
964 }
965 }
966 return false;
967}
968
03d196b7
BR
969int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
970 target_ulong addr, target_ulong size,
6787d27b 971 sPAPROptionVector *ov5_updates)
03d196b7
BR
972{
973 void *fdt, *fdt_skel;
974 sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
03d196b7 975
10f12e64
DHB
976 if (spapr_hotplugged_dev_before_cas()) {
977 return 1;
978 }
979
827b17c4
GK
980 if (size < sizeof(hdr) || size > FW_MAX_SIZE) {
981 error_report("SLOF provided an unexpected CAS buffer size "
982 TARGET_FMT_lu " (min: %zu, max: %u)",
983 size, sizeof(hdr), FW_MAX_SIZE);
984 exit(EXIT_FAILURE);
985 }
986
03d196b7
BR
987 size -= sizeof(hdr);
988
10f12e64 989 /* Create skeleton */
03d196b7
BR
990 fdt_skel = g_malloc0(size);
991 _FDT((fdt_create(fdt_skel, size)));
127f03e4 992 _FDT((fdt_finish_reservemap(fdt_skel)));
03d196b7
BR
993 _FDT((fdt_begin_node(fdt_skel, "")));
994 _FDT((fdt_end_node(fdt_skel)));
995 _FDT((fdt_finish(fdt_skel)));
996 fdt = g_malloc0(size);
997 _FDT((fdt_open_into(fdt_skel, fdt, size)));
998 g_free(fdt_skel);
999
1000 /* Fixup cpu nodes */
5b120785 1001 _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
03d196b7 1002
6787d27b
MR
1003 if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
1004 return -1;
03d196b7
BR
1005 }
1006
1007 /* Pack resulting tree */
1008 _FDT((fdt_pack(fdt)));
1009
1010 if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
1011 trace_spapr_cas_failed(size);
1012 return -1;
1013 }
1014
1015 cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
1016 cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
1017 trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
1018 g_free(fdt);
1019
1020 return 0;
1021}
1022
3f5dabce
DG
1023static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
1024{
1025 int rtas;
1026 GString *hypertas = g_string_sized_new(256);
1027 GString *qemu_hypertas = g_string_sized_new(256);
1028 uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
0c9269a5 1029 uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
b0c14ec4 1030 memory_region_size(&MACHINE(spapr)->device_memory->mr);
3f5dabce 1031 uint32_t lrdr_capacity[] = {
0c9269a5
DH
1032 cpu_to_be32(max_device_addr >> 32),
1033 cpu_to_be32(max_device_addr & 0xffffffff),
3f5dabce
DG
1034 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
1035 cpu_to_be32(max_cpus / smp_threads),
1036 };
da9f80fb
SP
1037 uint32_t maxdomains[] = {
1038 cpu_to_be32(4),
1039 cpu_to_be32(0),
1040 cpu_to_be32(0),
1041 cpu_to_be32(0),
3908a24f 1042 cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1),
da9f80fb 1043 };
3f5dabce
DG
1044
1045 _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
1046
1047 /* hypertas */
1048 add_str(hypertas, "hcall-pft");
1049 add_str(hypertas, "hcall-term");
1050 add_str(hypertas, "hcall-dabr");
1051 add_str(hypertas, "hcall-interrupt");
1052 add_str(hypertas, "hcall-tce");
1053 add_str(hypertas, "hcall-vio");
1054 add_str(hypertas, "hcall-splpar");
1055 add_str(hypertas, "hcall-bulk");
1056 add_str(hypertas, "hcall-set-mode");
1057 add_str(hypertas, "hcall-sprg0");
1058 add_str(hypertas, "hcall-copy");
1059 add_str(hypertas, "hcall-debug");
c24ba3d0 1060 add_str(hypertas, "hcall-vphn");
3f5dabce
DG
1061 add_str(qemu_hypertas, "hcall-memop1");
1062
1063 if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
1064 add_str(hypertas, "hcall-multi-tce");
1065 }
30f4b05b
DG
1066
1067 if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
1068 add_str(hypertas, "hcall-hpt-resize");
1069 }
1070
3f5dabce
DG
1071 _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
1072 hypertas->str, hypertas->len));
1073 g_string_free(hypertas, TRUE);
1074 _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
1075 qemu_hypertas->str, qemu_hypertas->len));
1076 g_string_free(qemu_hypertas, TRUE);
1077
1078 _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
1079 refpoints, sizeof(refpoints)));
1080
da9f80fb
SP
1081 _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
1082 maxdomains, sizeof(maxdomains)));
1083
3f5dabce
DG
1084 _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
1085 RTAS_ERROR_LOG_MAX));
1086 _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
1087 RTAS_EVENT_SCAN_RATE));
1088
4f441474
DG
1089 g_assert(msi_nonbroken);
1090 _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
3f5dabce
DG
1091
1092 /*
1093 * According to PAPR, rtas ibm,os-term does not guarantee a return
1094 * back to the guest cpu.
1095 *
1096 * While an additional ibm,extended-os-term property indicates
1097 * that rtas call return will always occur. Set this property.
1098 */
1099 _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
1100
1101 _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
1102 lrdr_capacity, sizeof(lrdr_capacity)));
1103
1104 spapr_dt_rtas_tokens(fdt, rtas);
1105}
1106
db592b5b
CLG
1107/*
1108 * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
1109 * and the XIVE features that the guest may request and thus the valid
1110 * values for bytes 23..26 of option vector 5:
1111 */
1112static void spapr_dt_ov5_platform_support(sPAPRMachineState *spapr, void *fdt,
1113 int chosen)
9fb4541f 1114{
545d6e2b
SJS
1115 PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
1116
f2b14e3a 1117 char val[2 * 4] = {
3ba3d0bc 1118 23, spapr->irq->ov5, /* Xive mode. */
9fb4541f
SB
1119 24, 0x00, /* Hash/Radix, filled in below. */
1120 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
1121 26, 0x40, /* Radix options: GTSE == yes. */
1122 };
1123
7abd43ba
SJS
1124 if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
1125 first_ppc_cpu->compat_pvr)) {
db592b5b
CLG
1126 /*
1127 * If we're in a pre POWER9 compat mode then the guest should
1128 * do hash and use the legacy interrupt mode
1129 */
1130 val[1] = 0x00; /* XICS */
7abd43ba
SJS
1131 val[3] = 0x00; /* Hash */
1132 } else if (kvm_enabled()) {
9fb4541f 1133 if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
f2b14e3a 1134 val[3] = 0x80; /* OV5_MMU_BOTH */
9fb4541f 1135 } else if (kvmppc_has_cap_mmu_radix()) {
f2b14e3a 1136 val[3] = 0x40; /* OV5_MMU_RADIX_300 */
9fb4541f 1137 } else {
f2b14e3a 1138 val[3] = 0x00; /* Hash */
9fb4541f
SB
1139 }
1140 } else {
7abd43ba
SJS
1141 /* V3 MMU supports both hash and radix in tcg (with dynamic switching) */
1142 val[3] = 0xC0;
9fb4541f
SB
1143 }
1144 _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
1145 val, sizeof(val)));
1146}
1147
7c866c6a
DG
1148static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
1149{
1150 MachineState *machine = MACHINE(spapr);
1151 int chosen;
1152 const char *boot_device = machine->boot_order;
1153 char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
1154 size_t cb = 0;
907aac2f 1155 char *bootlist = get_boot_devices_list(&cb);
7c866c6a
DG
1156
1157 _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
1158
7c866c6a
DG
1159 _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline));
1160 _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
1161 spapr->initrd_base));
1162 _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
1163 spapr->initrd_base + spapr->initrd_size));
1164
1165 if (spapr->kernel_size) {
1166 uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
1167 cpu_to_be64(spapr->kernel_size) };
1168
1169 _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
1170 &kprop, sizeof(kprop)));
1171 if (spapr->kernel_le) {
1172 _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
1173 }
1174 }
1175 if (boot_menu) {
1176 _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
1177 }
1178 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
1179 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
1180 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
1181
1182 if (cb && bootlist) {
1183 int i;
1184
1185 for (i = 0; i < cb; i++) {
1186 if (bootlist[i] == '\n') {
1187 bootlist[i] = ' ';
1188 }
1189 }
1190 _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
1191 }
1192
1193 if (boot_device && strlen(boot_device)) {
1194 _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
1195 }
1196
1197 if (!spapr->has_graphics && stdout_path) {
90ee4e01
ND
1198 /*
1199 * "linux,stdout-path" and "stdout" properties are deprecated by linux
1200 * kernel. New platforms should only use the "stdout-path" property. Set
1201 * the new property and continue using older property to remain
1202 * compatible with the existing firmware.
1203 */
7c866c6a 1204 _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
90ee4e01 1205 _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
7c866c6a
DG
1206 }
1207
db592b5b 1208 spapr_dt_ov5_platform_support(spapr, fdt, chosen);
9fb4541f 1209
7c866c6a
DG
1210 g_free(stdout_path);
1211 g_free(bootlist);
1212}
1213
fca5f2dc
DG
1214static void spapr_dt_hypervisor(sPAPRMachineState *spapr, void *fdt)
1215{
1216 /* The /hypervisor node isn't in PAPR - this is a hack to allow PR
1217 * KVM to work under pHyp with some guest co-operation */
1218 int hypervisor;
1219 uint8_t hypercall[16];
1220
1221 _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
1222 /* indicate KVM hypercall interface */
1223 _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
1224 if (kvmppc_has_cap_fixup_hcalls()) {
1225 /*
1226 * Older KVM versions with older guest kernels were broken
1227 * with the magic page, don't allow the guest to map it.
1228 */
1229 if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
1230 sizeof(hypercall))) {
1231 _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
1232 hypercall, sizeof(hypercall)));
1233 }
1234 }
1235}
1236
df269271 1237static void *spapr_build_fdt(sPAPRMachineState *spapr)
a3467baa 1238{
c86c1aff 1239 MachineState *machine = MACHINE(spapr);
3c0c47e3 1240 MachineClass *mc = MACHINE_GET_CLASS(machine);
c20d332a 1241 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
7c866c6a 1242 int ret;
a3467baa 1243 void *fdt;
3384f95c 1244 sPAPRPHBState *phb;
398a0bd5 1245 char *buf;
a3467baa 1246
398a0bd5
DG
1247 fdt = g_malloc0(FDT_MAX_SIZE);
1248 _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
a3467baa 1249
398a0bd5
DG
1250 /* Root node */
1251 _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
1252 _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
1253 _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
1254
1255 /*
1256 * Add info to guest to indentify which host is it being run on
1257 * and what is the uuid of the guest
1258 */
27461d69
PP
1259 if (spapr->host_model && !g_str_equal(spapr->host_model, "none")) {
1260 if (g_str_equal(spapr->host_model, "passthrough")) {
1261 /* -M host-model=passthrough */
1262 if (kvmppc_get_host_model(&buf)) {
1263 _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
1264 g_free(buf);
1265 }
1266 } else {
1267 /* -M host-model=<user-string> */
1268 _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model));
1269 }
398a0bd5 1270 }
27461d69
PP
1271
1272 if (spapr->host_serial && !g_str_equal(spapr->host_serial, "none")) {
1273 if (g_str_equal(spapr->host_serial, "passthrough")) {
1274 /* -M host-serial=passthrough */
1275 if (kvmppc_get_host_serial(&buf)) {
1276 _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
1277 g_free(buf);
1278 }
1279 } else {
1280 /* -M host-serial=<user-string> */
1281 _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial));
1282 }
398a0bd5
DG
1283 }
1284
1285 buf = qemu_uuid_unparse_strdup(&qemu_uuid);
1286
1287 _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
1288 if (qemu_uuid_set) {
1289 _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
1290 }
1291 g_free(buf);
1292
1293 if (qemu_get_vm_name()) {
1294 _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
1295 qemu_get_vm_name()));
1296 }
1297
1298 _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
1299 _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
4040ab72 1300
fc7e0765 1301 /* /interrupt controller */
3ba3d0bc 1302 spapr->irq->dt_populate(spapr, spapr_max_server_number(spapr), fdt,
5c7adcf4 1303 PHANDLE_INTC);
fc7e0765 1304
e8f986fc
BR
1305 ret = spapr_populate_memory(spapr, fdt);
1306 if (ret < 0) {
ce9863b7 1307 error_report("couldn't setup memory nodes in fdt");
e8f986fc 1308 exit(1);
7f763a5d
DG
1309 }
1310
bf5a6696
DG
1311 /* /vdevice */
1312 spapr_dt_vdevice(spapr->vio_bus, fdt);
4040ab72 1313
4d9392be
TH
1314 if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
1315 ret = spapr_rng_populate_dt(fdt);
1316 if (ret < 0) {
ce9863b7 1317 error_report("could not set up rng device in the fdt");
4d9392be
TH
1318 exit(1);
1319 }
1320 }
1321
3384f95c 1322 QLIST_FOREACH(phb, &spapr->phbs, list) {
5c7adcf4 1323 ret = spapr_populate_pci_dt(phb, PHANDLE_INTC, fdt,
0a0a66cd 1324 spapr->irq->nr_msis, NULL);
da34fed7
TH
1325 if (ret < 0) {
1326 error_report("couldn't setup PCI devices in fdt");
1327 exit(1);
1328 }
3384f95c
DG
1329 }
1330
0da6f3fe
BR
1331 /* cpus */
1332 spapr_populate_cpus_dt_node(fdt, spapr);
6e806cc3 1333
c20d332a
BR
1334 if (smc->dr_lmb_enabled) {
1335 _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
1336 }
1337
c5514d0e 1338 if (mc->has_hotpluggable_cpus) {
af81cf32
BR
1339 int offset = fdt_path_offset(fdt, "/cpus");
1340 ret = spapr_drc_populate_dt(fdt, offset, NULL,
1341 SPAPR_DR_CONNECTOR_TYPE_CPU);
1342 if (ret < 0) {
1343 error_report("Couldn't set up CPU DR device tree properties");
1344 exit(1);
1345 }
1346 }
1347
ffb1e275 1348 /* /event-sources */
ffbb1705 1349 spapr_dt_events(spapr, fdt);
ffb1e275 1350
3f5dabce
DG
1351 /* /rtas */
1352 spapr_dt_rtas(spapr, fdt);
1353
7c866c6a
DG
1354 /* /chosen */
1355 spapr_dt_chosen(spapr, fdt);
cf6e5223 1356
fca5f2dc
DG
1357 /* /hypervisor */
1358 if (kvm_enabled()) {
1359 spapr_dt_hypervisor(spapr, fdt);
1360 }
1361
cf6e5223
DG
1362 /* Build memory reserve map */
1363 if (spapr->kernel_size) {
1364 _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size)));
1365 }
1366 if (spapr->initrd_size) {
1367 _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size)));
1368 }
1369
6787d27b
MR
1370 /* ibm,client-architecture-support updates */
1371 ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas);
1372 if (ret < 0) {
1373 error_report("couldn't setup CAS properties fdt");
1374 exit(1);
1375 }
1376
3998ccd0
NF
1377 if (smc->dr_phb_enabled) {
1378 ret = spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_PHB);
1379 if (ret < 0) {
1380 error_report("Couldn't set up PHB DR device tree properties");
1381 exit(1);
1382 }
1383 }
1384
997b6cfc 1385 return fdt;
9fdf0c29
DG
1386}
1387
1388static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
1389{
1390 return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
1391}
1392
1d1be34d
DG
1393static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
1394 PowerPCCPU *cpu)
9fdf0c29 1395{
1b14670a
AF
1396 CPUPPCState *env = &cpu->env;
1397
8d04fb55
JK
1398 /* The TCG path should also be holding the BQL at this point */
1399 g_assert(qemu_mutex_iothread_locked());
1400
efcb9383
DG
1401 if (msr_pr) {
1402 hcall_dprintf("Hypercall made with MSR[PR]=1\n");
1403 env->gpr[3] = H_PRIVILEGE;
1404 } else {
aa100fa4 1405 env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
efcb9383 1406 }
9fdf0c29
DG
1407}
1408
00fd075e
BH
1409struct LPCRSyncState {
1410 target_ulong value;
1411 target_ulong mask;
1412};
1413
1414static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg)
1415{
1416 struct LPCRSyncState *s = arg.host_ptr;
1417 PowerPCCPU *cpu = POWERPC_CPU(cs);
1418 CPUPPCState *env = &cpu->env;
1419 target_ulong lpcr;
1420
1421 cpu_synchronize_state(cs);
1422 lpcr = env->spr[SPR_LPCR];
1423 lpcr &= ~s->mask;
1424 lpcr |= s->value;
1425 ppc_store_lpcr(cpu, lpcr);
1426}
1427
1428void spapr_set_all_lpcrs(target_ulong value, target_ulong mask)
1429{
1430 CPUState *cs;
1431 struct LPCRSyncState s = {
1432 .value = value,
1433 .mask = mask
1434 };
1435 CPU_FOREACH(cs) {
1436 run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s));
1437 }
1438}
1439
79825f4d 1440static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry)
9861bb3e
SJS
1441{
1442 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1443
79825f4d
BH
1444 /* Copy PATE1:GR into PATE0:HR */
1445 entry->dw0 = spapr->patb_entry & PATE0_HR;
1446 entry->dw1 = spapr->patb_entry;
9861bb3e
SJS
1447}
1448
e6b8fd24
SMJ
1449#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2))
1450#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
1451#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
1452#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
1453#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
1454
715c5407
DG
1455/*
1456 * Get the fd to access the kernel htab, re-opening it if necessary
1457 */
1458static int get_htab_fd(sPAPRMachineState *spapr)
1459{
14b0d748
GK
1460 Error *local_err = NULL;
1461
715c5407
DG
1462 if (spapr->htab_fd >= 0) {
1463 return spapr->htab_fd;
1464 }
1465
14b0d748 1466 spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err);
715c5407 1467 if (spapr->htab_fd < 0) {
14b0d748 1468 error_report_err(local_err);
715c5407
DG
1469 }
1470
1471 return spapr->htab_fd;
1472}
1473
b4db5413 1474void close_htab_fd(sPAPRMachineState *spapr)
715c5407
DG
1475{
1476 if (spapr->htab_fd >= 0) {
1477 close(spapr->htab_fd);
1478 }
1479 spapr->htab_fd = -1;
1480}
1481
e57ca75c
DG
1482static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
1483{
1484 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1485
1486 return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
1487}
1488
1ec26c75
GK
1489static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
1490{
1491 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1492
1493 assert(kvm_enabled());
1494
1495 if (!spapr->htab) {
1496 return 0;
1497 }
1498
1499 return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18);
1500}
1501
e57ca75c
DG
1502static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
1503 hwaddr ptex, int n)
1504{
1505 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1506 hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
1507
1508 if (!spapr->htab) {
1509 /*
1510 * HTAB is controlled by KVM. Fetch into temporary buffer
1511 */
1512 ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
1513 kvmppc_read_hptes(hptes, ptex, n);
1514 return hptes;
1515 }
1516
1517 /*
1518 * HTAB is controlled by QEMU. Just point to the internally
1519 * accessible PTEG.
1520 */
1521 return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
1522}
1523
1524static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
1525 const ppc_hash_pte64_t *hptes,
1526 hwaddr ptex, int n)
1527{
1528 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1529
1530 if (!spapr->htab) {
1531 g_free((void *)hptes);
1532 }
1533
1534 /* Nothing to do for qemu managed HPT */
1535}
1536
1537static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
1538 uint64_t pte0, uint64_t pte1)
1539{
1540 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1541 hwaddr offset = ptex * HASH_PTE_SIZE_64;
1542
1543 if (!spapr->htab) {
1544 kvmppc_write_hpte(ptex, pte0, pte1);
1545 } else {
3054b0ca
BH
1546 if (pte0 & HPTE64_V_VALID) {
1547 stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
1548 /*
1549 * When setting valid, we write PTE1 first. This ensures
1550 * proper synchronization with the reading code in
1551 * ppc_hash64_pteg_search()
1552 */
1553 smp_wmb();
1554 stq_p(spapr->htab + offset, pte0);
1555 } else {
1556 stq_p(spapr->htab + offset, pte0);
1557 /*
1558 * When clearing it we set PTE0 first. This ensures proper
1559 * synchronization with the reading code in
1560 * ppc_hash64_pteg_search()
1561 */
1562 smp_wmb();
1563 stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
1564 }
e57ca75c
DG
1565 }
1566}
1567
0b0b8310 1568int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
8dfe8e7f
DG
1569{
1570 int shift;
1571
1572 /* We aim for a hash table of size 1/128 the size of RAM (rounded
1573 * up). The PAPR recommendation is actually 1/64 of RAM size, but
1574 * that's much more than is needed for Linux guests */
1575 shift = ctz64(pow2ceil(ramsize)) - 7;
1576 shift = MAX(shift, 18); /* Minimum architected size */
1577 shift = MIN(shift, 46); /* Maximum architected size */
1578 return shift;
1579}
1580
06ec79e8
BR
1581void spapr_free_hpt(sPAPRMachineState *spapr)
1582{
1583 g_free(spapr->htab);
1584 spapr->htab = NULL;
1585 spapr->htab_shift = 0;
1586 close_htab_fd(spapr);
1587}
1588
2772cf6b
DG
1589void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
1590 Error **errp)
7f763a5d 1591{
c5f54f3e
DG
1592 long rc;
1593
1594 /* Clean up any HPT info from a previous boot */
06ec79e8 1595 spapr_free_hpt(spapr);
c5f54f3e
DG
1596
1597 rc = kvmppc_reset_htab(shift);
1598 if (rc < 0) {
1599 /* kernel-side HPT needed, but couldn't allocate one */
1600 error_setg_errno(errp, errno,
1601 "Failed to allocate KVM HPT of order %d (try smaller maxmem?)",
1602 shift);
1603 /* This is almost certainly fatal, but if the caller really
1604 * wants to carry on with shift == 0, it's welcome to try */
1605 } else if (rc > 0) {
1606 /* kernel-side HPT allocated */
1607 if (rc != shift) {
1608 error_setg(errp,
1609 "Requested order %d HPT, but kernel allocated order %ld (try smaller maxmem?)",
1610 shift, rc);
7735feda
BR
1611 }
1612
7f763a5d 1613 spapr->htab_shift = shift;
c18ad9a5 1614 spapr->htab = NULL;
b817772a 1615 } else {
c5f54f3e
DG
1616 /* kernel-side HPT not needed, allocate in userspace instead */
1617 size_t size = 1ULL << shift;
1618 int i;
b817772a 1619
c5f54f3e
DG
1620 spapr->htab = qemu_memalign(size, size);
1621 if (!spapr->htab) {
1622 error_setg_errno(errp, errno,
1623 "Could not allocate HPT of order %d", shift);
1624 return;
7735feda
BR
1625 }
1626
c5f54f3e
DG
1627 memset(spapr->htab, 0, size);
1628 spapr->htab_shift = shift;
e6b8fd24 1629
c5f54f3e
DG
1630 for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
1631 DIRTY_HPTE(HPTE(spapr->htab, i));
e6b8fd24 1632 }
7f763a5d 1633 }
ee4d9ecc 1634 /* We're setting up a hash table, so that means we're not radix */
00fd075e 1635 spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
9fdf0c29
DG
1636}
1637
b4db5413
SJS
1638void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
1639{
2772cf6b
DG
1640 int hpt_shift;
1641
1642 if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED)
1643 || (spapr->cas_reboot
1644 && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) {
1645 hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
1646 } else {
768a20f3
DG
1647 uint64_t current_ram_size;
1648
1649 current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
1650 hpt_shift = spapr_hpt_shift_for_ramsize(current_ram_size);
2772cf6b
DG
1651 }
1652 spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
1653
b4db5413 1654 if (spapr->vrma_adjust) {
c86c1aff 1655 spapr->rma_size = kvmppc_rma_size(spapr_node0_size(MACHINE(spapr)),
b4db5413
SJS
1656 spapr->htab_shift);
1657 }
b4db5413
SJS
1658}
1659
82512483
GK
1660static int spapr_reset_drcs(Object *child, void *opaque)
1661{
1662 sPAPRDRConnector *drc =
1663 (sPAPRDRConnector *) object_dynamic_cast(child,
1664 TYPE_SPAPR_DR_CONNECTOR);
1665
1666 if (drc) {
1667 spapr_drc_reset(drc);
1668 }
1669
1670 return 0;
1671}
1672
bcb5ce08 1673static void spapr_machine_reset(void)
a3467baa 1674{
c5f54f3e
DG
1675 MachineState *machine = MACHINE(qdev_get_machine());
1676 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
182735ef 1677 PowerPCCPU *first_ppc_cpu;
b7d1f77a 1678 uint32_t rtas_limit;
cae172ab 1679 hwaddr rtas_addr, fdt_addr;
997b6cfc
DG
1680 void *fdt;
1681 int rc;
259186a7 1682
9f6edd06 1683 spapr_caps_apply(spapr);
33face6b 1684
1481fe5f
LV
1685 first_ppc_cpu = POWERPC_CPU(first_cpu);
1686 if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
ad99d04c
DG
1687 ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
1688 spapr->max_compat_pvr)) {
79825f4d
BH
1689 /*
1690 * If using KVM with radix mode available, VCPUs can be started
b4db5413 1691 * without a HPT because KVM will start them in radix mode.
79825f4d
BH
1692 * Set the GR bit in PATE so that we know there is no HPT.
1693 */
1694 spapr->patb_entry = PATE1_GR;
00fd075e 1695 spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT);
b4db5413 1696 } else {
b4db5413 1697 spapr_setup_hpt_and_vrma(spapr);
c5f54f3e 1698 }
a3467baa 1699
79825f4d
BH
1700 /*
1701 * If this reset wasn't generated by CAS, we should reset our
1702 * negotiated options and start from scratch
1703 */
9012a53f
GK
1704 if (!spapr->cas_reboot) {
1705 spapr_ovec_cleanup(spapr->ov5_cas);
1706 spapr->ov5_cas = spapr_ovec_new();
1707
1708 ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal);
1709 }
1710
82cffa2e
CLG
1711 if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
1712 spapr_irq_msi_reset(spapr);
1713 }
1714
c8787ad4 1715 qemu_devices_reset();
82512483 1716
b2e22477
CLG
1717 /*
1718 * This is fixing some of the default configuration of the XIVE
1719 * devices. To be called after the reset of the machine devices.
1720 */
1721 spapr_irq_reset(spapr, &error_fatal);
1722
23ff81bd
GK
1723 /*
1724 * There is no CAS under qtest. Simulate one to please the code that
1725 * depends on spapr->ov5_cas. This is especially needed to test device
1726 * unplug, so we do that before resetting the DRCs.
1727 */
1728 if (qtest_enabled()) {
1729 spapr_ovec_cleanup(spapr->ov5_cas);
1730 spapr->ov5_cas = spapr_ovec_clone(spapr->ov5);
1731 }
1732
82512483
GK
1733 /* DRC reset may cause a device to be unplugged. This will cause troubles
1734 * if this device is used by another device (eg, a running vhost backend
1735 * will crash QEMU if the DIMM holding the vring goes away). To avoid such
1736 * situations, we reset DRCs after all devices have been reset.
1737 */
1738 object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
1739
56258174 1740 spapr_clear_pending_events(spapr);
a3467baa 1741
b7d1f77a
BH
1742 /*
1743 * We place the device tree and RTAS just below either the top of the RMA,
df269271 1744 * or just below 2GB, whichever is lower, so that it can be
b7d1f77a
BH
1745 * processed with 32-bit real mode code if necessary
1746 */
1747 rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
cae172ab
DG
1748 rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1749 fdt_addr = rtas_addr - FDT_MAX_SIZE;
b7d1f77a 1750
df269271 1751 fdt = spapr_build_fdt(spapr);
a3467baa 1752
2cac78c1 1753 spapr_load_rtas(spapr, fdt, rtas_addr);
b7d1f77a 1754
997b6cfc
DG
1755 rc = fdt_pack(fdt);
1756
1757 /* Should only fail if we've built a corrupted tree */
1758 assert(rc == 0);
1759
1760 if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
1761 error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
1762 fdt_totalsize(fdt), FDT_MAX_SIZE);
1763 exit(1);
1764 }
1765
1766 /* Load the fdt */
1767 qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
cae172ab 1768 cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
fea35ca4
AK
1769 g_free(spapr->fdt_blob);
1770 spapr->fdt_size = fdt_totalsize(fdt);
1771 spapr->fdt_initial_size = spapr->fdt_size;
1772 spapr->fdt_blob = fdt;
997b6cfc 1773
a3467baa 1774 /* Set up the entry state */
84369f63 1775 spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, fdt_addr);
182735ef 1776 first_ppc_cpu->env.gpr[5] = 0;
a3467baa 1777
6787d27b 1778 spapr->cas_reboot = false;
a3467baa
DG
1779}
1780
28e02042 1781static void spapr_create_nvram(sPAPRMachineState *spapr)
639e8102 1782{
2ff3de68 1783 DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
3978b863 1784 DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
639e8102 1785
3978b863 1786 if (dinfo) {
6231a6da
MA
1787 qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
1788 &error_fatal);
639e8102
DG
1789 }
1790
1791 qdev_init_nofail(dev);
1792
1793 spapr->nvram = (struct sPAPRNVRAM *)dev;
1794}
1795
28e02042 1796static void spapr_rtc_create(sPAPRMachineState *spapr)
28df36a1 1797{
f6d4dca8
TH
1798 object_initialize_child(OBJECT(spapr), "rtc",
1799 &spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC,
1800 &error_fatal, NULL);
147ff807
CLG
1801 object_property_set_bool(OBJECT(&spapr->rtc), true, "realized",
1802 &error_fatal);
1803 object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
1804 "date", &error_fatal);
28df36a1
DG
1805}
1806
8c57b867 1807/* Returns whether we want to use VGA or not */
14c6a894 1808static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
f28359d8 1809{
8c57b867 1810 switch (vga_interface_type) {
8c57b867 1811 case VGA_NONE:
7effdaa3
MW
1812 return false;
1813 case VGA_DEVICE:
1814 return true;
1ddcae82 1815 case VGA_STD:
b798c190 1816 case VGA_VIRTIO:
6e66d0c6 1817 case VGA_CIRRUS:
1ddcae82 1818 return pci_vga_init(pci_bus) != NULL;
8c57b867 1819 default:
14c6a894
DG
1820 error_setg(errp,
1821 "Unsupported VGA mode, only -vga std or -vga virtio is supported");
1822 return false;
f28359d8 1823 }
f28359d8
LZ
1824}
1825
4e5fe368
SJS
1826static int spapr_pre_load(void *opaque)
1827{
1828 int rc;
1829
1830 rc = spapr_caps_pre_load(opaque);
1831 if (rc) {
1832 return rc;
1833 }
1834
1835 return 0;
1836}
1837
880ae7de
DG
1838static int spapr_post_load(void *opaque, int version_id)
1839{
28e02042 1840 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
880ae7de
DG
1841 int err = 0;
1842
be85537d
DG
1843 err = spapr_caps_post_migration(spapr);
1844 if (err) {
1845 return err;
1846 }
1847
e502202c
CLG
1848 /*
1849 * In earlier versions, there was no separate qdev for the PAPR
880ae7de
DG
1850 * RTC, so the RTC offset was stored directly in sPAPREnvironment.
1851 * So when migrating from those versions, poke the incoming offset
e502202c
CLG
1852 * value into the RTC device
1853 */
880ae7de 1854 if (version_id < 3) {
147ff807 1855 err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
e502202c
CLG
1856 if (err) {
1857 return err;
1858 }
880ae7de
DG
1859 }
1860
0c86b2df 1861 if (kvm_enabled() && spapr->patb_entry) {
d39c90f5 1862 PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
79825f4d 1863 bool radix = !!(spapr->patb_entry & PATE1_GR);
d39c90f5 1864 bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE);
00fd075e
BH
1865
1866 /*
1867 * Update LPCR:HR and UPRT as they may not be set properly in
1868 * the stream
1869 */
1870 spapr_set_all_lpcrs(radix ? (LPCR_HR | LPCR_UPRT) : 0,
1871 LPCR_HR | LPCR_UPRT);
d39c90f5
BR
1872
1873 err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry);
1874 if (err) {
1875 error_report("Process table config unsupported by the host");
1876 return -EINVAL;
1877 }
1878 }
1879
1c53b06c
CLG
1880 err = spapr_irq_post_load(spapr, version_id);
1881 if (err) {
1882 return err;
1883 }
1884
880ae7de
DG
1885 return err;
1886}
1887
4e5fe368
SJS
1888static int spapr_pre_save(void *opaque)
1889{
1890 int rc;
1891
1892 rc = spapr_caps_pre_save(opaque);
1893 if (rc) {
1894 return rc;
1895 }
1896
1897 return 0;
1898}
1899
880ae7de
DG
1900static bool version_before_3(void *opaque, int version_id)
1901{
1902 return version_id < 3;
1903}
1904
fd38804b
DHB
1905static bool spapr_pending_events_needed(void *opaque)
1906{
1907 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
1908 return !QTAILQ_EMPTY(&spapr->pending_events);
1909}
1910
1911static const VMStateDescription vmstate_spapr_event_entry = {
1912 .name = "spapr_event_log_entry",
1913 .version_id = 1,
1914 .minimum_version_id = 1,
1915 .fields = (VMStateField[]) {
5341258e
DG
1916 VMSTATE_UINT32(summary, sPAPREventLogEntry),
1917 VMSTATE_UINT32(extended_length, sPAPREventLogEntry),
fd38804b 1918 VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0,
5341258e 1919 NULL, extended_length),
fd38804b
DHB
1920 VMSTATE_END_OF_LIST()
1921 },
1922};
1923
1924static const VMStateDescription vmstate_spapr_pending_events = {
1925 .name = "spapr_pending_events",
1926 .version_id = 1,
1927 .minimum_version_id = 1,
1928 .needed = spapr_pending_events_needed,
1929 .fields = (VMStateField[]) {
1930 VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
1931 vmstate_spapr_event_entry, sPAPREventLogEntry, next),
1932 VMSTATE_END_OF_LIST()
1933 },
1934};
1935
62ef3760
MR
1936static bool spapr_ov5_cas_needed(void *opaque)
1937{
1938 sPAPRMachineState *spapr = opaque;
1939 sPAPROptionVector *ov5_mask = spapr_ovec_new();
1940 sPAPROptionVector *ov5_legacy = spapr_ovec_new();
1941 sPAPROptionVector *ov5_removed = spapr_ovec_new();
1942 bool cas_needed;
1943
1944 /* Prior to the introduction of sPAPROptionVector, we had two option
1945 * vectors we dealt with: OV5_FORM1_AFFINITY, and OV5_DRCONF_MEMORY.
1946 * Both of these options encode machine topology into the device-tree
1947 * in such a way that the now-booted OS should still be able to interact
1948 * appropriately with QEMU regardless of what options were actually
1949 * negotiatied on the source side.
1950 *
1951 * As such, we can avoid migrating the CAS-negotiated options if these
1952 * are the only options available on the current machine/platform.
1953 * Since these are the only options available for pseries-2.7 and
1954 * earlier, this allows us to maintain old->new/new->old migration
1955 * compatibility.
1956 *
1957 * For QEMU 2.8+, there are additional CAS-negotiatable options available
1958 * via default pseries-2.8 machines and explicit command-line parameters.
1959 * Some of these options, like OV5_HP_EVT, *do* require QEMU to be aware
1960 * of the actual CAS-negotiated values to continue working properly. For
1961 * example, availability of memory unplug depends on knowing whether
1962 * OV5_HP_EVT was negotiated via CAS.
1963 *
1964 * Thus, for any cases where the set of available CAS-negotiatable
1965 * options extends beyond OV5_FORM1_AFFINITY and OV5_DRCONF_MEMORY, we
aef19c04
GK
1966 * include the CAS-negotiated options in the migration stream, unless
1967 * if they affect boot time behaviour only.
62ef3760
MR
1968 */
1969 spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY);
1970 spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY);
aef19c04 1971 spapr_ovec_set(ov5_mask, OV5_DRMEM_V2);
62ef3760
MR
1972
1973 /* spapr_ovec_diff returns true if bits were removed. we avoid using
1974 * the mask itself since in the future it's possible "legacy" bits may be
1975 * removed via machine options, which could generate a false positive
1976 * that breaks migration.
1977 */
1978 spapr_ovec_intersect(ov5_legacy, spapr->ov5, ov5_mask);
1979 cas_needed = spapr_ovec_diff(ov5_removed, spapr->ov5, ov5_legacy);
1980
1981 spapr_ovec_cleanup(ov5_mask);
1982 spapr_ovec_cleanup(ov5_legacy);
1983 spapr_ovec_cleanup(ov5_removed);
1984
1985 return cas_needed;
1986}
1987
1988static const VMStateDescription vmstate_spapr_ov5_cas = {
1989 .name = "spapr_option_vector_ov5_cas",
1990 .version_id = 1,
1991 .minimum_version_id = 1,
1992 .needed = spapr_ov5_cas_needed,
1993 .fields = (VMStateField[]) {
1994 VMSTATE_STRUCT_POINTER_V(ov5_cas, sPAPRMachineState, 1,
1995 vmstate_spapr_ovec, sPAPROptionVector),
1996 VMSTATE_END_OF_LIST()
1997 },
1998};
1999
9861bb3e
SJS
2000static bool spapr_patb_entry_needed(void *opaque)
2001{
2002 sPAPRMachineState *spapr = opaque;
2003
2004 return !!spapr->patb_entry;
2005}
2006
2007static const VMStateDescription vmstate_spapr_patb_entry = {
2008 .name = "spapr_patb_entry",
2009 .version_id = 1,
2010 .minimum_version_id = 1,
2011 .needed = spapr_patb_entry_needed,
2012 .fields = (VMStateField[]) {
2013 VMSTATE_UINT64(patb_entry, sPAPRMachineState),
2014 VMSTATE_END_OF_LIST()
2015 },
2016};
2017
82cffa2e
CLG
2018static bool spapr_irq_map_needed(void *opaque)
2019{
2020 sPAPRMachineState *spapr = opaque;
2021
2022 return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
2023}
2024
2025static const VMStateDescription vmstate_spapr_irq_map = {
2026 .name = "spapr_irq_map",
2027 .version_id = 1,
2028 .minimum_version_id = 1,
2029 .needed = spapr_irq_map_needed,
2030 .fields = (VMStateField[]) {
2031 VMSTATE_BITMAP(irq_map, sPAPRMachineState, 0, irq_map_nr),
2032 VMSTATE_END_OF_LIST()
2033 },
2034};
2035
fea35ca4
AK
2036static bool spapr_dtb_needed(void *opaque)
2037{
2038 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque);
2039
2040 return smc->update_dt_enabled;
2041}
2042
2043static int spapr_dtb_pre_load(void *opaque)
2044{
2045 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
2046
2047 g_free(spapr->fdt_blob);
2048 spapr->fdt_blob = NULL;
2049 spapr->fdt_size = 0;
2050
2051 return 0;
2052}
2053
2054static const VMStateDescription vmstate_spapr_dtb = {
2055 .name = "spapr_dtb",
2056 .version_id = 1,
2057 .minimum_version_id = 1,
2058 .needed = spapr_dtb_needed,
2059 .pre_load = spapr_dtb_pre_load,
2060 .fields = (VMStateField[]) {
2061 VMSTATE_UINT32(fdt_initial_size, sPAPRMachineState),
2062 VMSTATE_UINT32(fdt_size, sPAPRMachineState),
2063 VMSTATE_VBUFFER_ALLOC_UINT32(fdt_blob, sPAPRMachineState, 0, NULL,
2064 fdt_size),
2065 VMSTATE_END_OF_LIST()
2066 },
2067};
2068
4be21d56
DG
2069static const VMStateDescription vmstate_spapr = {
2070 .name = "spapr",
880ae7de 2071 .version_id = 3,
4be21d56 2072 .minimum_version_id = 1,
4e5fe368 2073 .pre_load = spapr_pre_load,
880ae7de 2074 .post_load = spapr_post_load,
4e5fe368 2075 .pre_save = spapr_pre_save,
3aff6c2f 2076 .fields = (VMStateField[]) {
880ae7de
DG
2077 /* used to be @next_irq */
2078 VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
4be21d56
DG
2079
2080 /* RTC offset */
28e02042 2081 VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
880ae7de 2082
28e02042 2083 VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
4be21d56
DG
2084 VMSTATE_END_OF_LIST()
2085 },
62ef3760
MR
2086 .subsections = (const VMStateDescription*[]) {
2087 &vmstate_spapr_ov5_cas,
9861bb3e 2088 &vmstate_spapr_patb_entry,
fd38804b 2089 &vmstate_spapr_pending_events,
4e5fe368
SJS
2090 &vmstate_spapr_cap_htm,
2091 &vmstate_spapr_cap_vsx,
2092 &vmstate_spapr_cap_dfp,
8f38eaf8 2093 &vmstate_spapr_cap_cfpc,
09114fd8 2094 &vmstate_spapr_cap_sbbc,
4be8d4e7 2095 &vmstate_spapr_cap_ibs,
82cffa2e 2096 &vmstate_spapr_irq_map,
b9a477b7 2097 &vmstate_spapr_cap_nested_kvm_hv,
fea35ca4 2098 &vmstate_spapr_dtb,
c982f5cf 2099 &vmstate_spapr_cap_large_decr,
8ff43ee4 2100 &vmstate_spapr_cap_ccf_assist,
62ef3760
MR
2101 NULL
2102 }
4be21d56
DG
2103};
2104
4be21d56
DG
2105static int htab_save_setup(QEMUFile *f, void *opaque)
2106{
28e02042 2107 sPAPRMachineState *spapr = opaque;
4be21d56 2108
4be21d56 2109 /* "Iteration" header */
3a384297
BR
2110 if (!spapr->htab_shift) {
2111 qemu_put_be32(f, -1);
2112 } else {
2113 qemu_put_be32(f, spapr->htab_shift);
2114 }
4be21d56 2115
e68cb8b4
AK
2116 if (spapr->htab) {
2117 spapr->htab_save_index = 0;
2118 spapr->htab_first_pass = true;
2119 } else {
3a384297
BR
2120 if (spapr->htab_shift) {
2121 assert(kvm_enabled());
2122 }
e68cb8b4
AK
2123 }
2124
2125
4be21d56
DG
2126 return 0;
2127}
2128
332f7721
GK
2129static void htab_save_chunk(QEMUFile *f, sPAPRMachineState *spapr,
2130 int chunkstart, int n_valid, int n_invalid)
2131{
2132 qemu_put_be32(f, chunkstart);
2133 qemu_put_be16(f, n_valid);
2134 qemu_put_be16(f, n_invalid);
2135 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
2136 HASH_PTE_SIZE_64 * n_valid);
2137}
2138
2139static void htab_save_end_marker(QEMUFile *f)
2140{
2141 qemu_put_be32(f, 0);
2142 qemu_put_be16(f, 0);
2143 qemu_put_be16(f, 0);
2144}
2145
28e02042 2146static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
4be21d56
DG
2147 int64_t max_ns)
2148{
378bc217 2149 bool has_timeout = max_ns != -1;
4be21d56
DG
2150 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
2151 int index = spapr->htab_save_index;
bc72ad67 2152 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4be21d56
DG
2153
2154 assert(spapr->htab_first_pass);
2155
2156 do {
2157 int chunkstart;
2158
2159 /* Consume invalid HPTEs */
2160 while ((index < htabslots)
2161 && !HPTE_VALID(HPTE(spapr->htab, index))) {
4be21d56 2162 CLEAN_HPTE(HPTE(spapr->htab, index));
24ec2863 2163 index++;
4be21d56
DG
2164 }
2165
2166 /* Consume valid HPTEs */
2167 chunkstart = index;
338c25b6 2168 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
4be21d56 2169 && HPTE_VALID(HPTE(spapr->htab, index))) {
4be21d56 2170 CLEAN_HPTE(HPTE(spapr->htab, index));
24ec2863 2171 index++;
4be21d56
DG
2172 }
2173
2174 if (index > chunkstart) {
2175 int n_valid = index - chunkstart;
2176
332f7721 2177 htab_save_chunk(f, spapr, chunkstart, n_valid, 0);
4be21d56 2178
378bc217
DG
2179 if (has_timeout &&
2180 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
4be21d56
DG
2181 break;
2182 }
2183 }
2184 } while ((index < htabslots) && !qemu_file_rate_limit(f));
2185
2186 if (index >= htabslots) {
2187 assert(index == htabslots);
2188 index = 0;
2189 spapr->htab_first_pass = false;
2190 }
2191 spapr->htab_save_index = index;
2192}
2193
28e02042 2194static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
e68cb8b4 2195 int64_t max_ns)
4be21d56
DG
2196{
2197 bool final = max_ns < 0;
2198 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
2199 int examined = 0, sent = 0;
2200 int index = spapr->htab_save_index;
bc72ad67 2201 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4be21d56
DG
2202
2203 assert(!spapr->htab_first_pass);
2204
2205 do {
2206 int chunkstart, invalidstart;
2207
2208 /* Consume non-dirty HPTEs */
2209 while ((index < htabslots)
2210 && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
2211 index++;
2212 examined++;
2213 }
2214
2215 chunkstart = index;
2216 /* Consume valid dirty HPTEs */
338c25b6 2217 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
4be21d56
DG
2218 && HPTE_DIRTY(HPTE(spapr->htab, index))
2219 && HPTE_VALID(HPTE(spapr->htab, index))) {
2220 CLEAN_HPTE(HPTE(spapr->htab, index));
2221 index++;
2222 examined++;
2223 }
2224
2225 invalidstart = index;
2226 /* Consume invalid dirty HPTEs */
338c25b6 2227 while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
4be21d56
DG
2228 && HPTE_DIRTY(HPTE(spapr->htab, index))
2229 && !HPTE_VALID(HPTE(spapr->htab, index))) {
2230 CLEAN_HPTE(HPTE(spapr->htab, index));
2231 index++;
2232 examined++;
2233 }
2234
2235 if (index > chunkstart) {
2236 int n_valid = invalidstart - chunkstart;
2237 int n_invalid = index - invalidstart;
2238
332f7721 2239 htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid);
4be21d56
DG
2240 sent += index - chunkstart;
2241
bc72ad67 2242 if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
4be21d56
DG
2243 break;
2244 }
2245 }
2246
2247 if (examined >= htabslots) {
2248 break;
2249 }
2250
2251 if (index >= htabslots) {
2252 assert(index == htabslots);
2253 index = 0;
2254 }
2255 } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
2256
2257 if (index >= htabslots) {
2258 assert(index == htabslots);
2259 index = 0;
2260 }
2261
2262 spapr->htab_save_index = index;
2263
e68cb8b4 2264 return (examined >= htabslots) && (sent == 0) ? 1 : 0;
4be21d56
DG
2265}
2266
e68cb8b4
AK
2267#define MAX_ITERATION_NS 5000000 /* 5 ms */
2268#define MAX_KVM_BUF_SIZE 2048
2269
4be21d56
DG
2270static int htab_save_iterate(QEMUFile *f, void *opaque)
2271{
28e02042 2272 sPAPRMachineState *spapr = opaque;
715c5407 2273 int fd;
e68cb8b4 2274 int rc = 0;
4be21d56
DG
2275
2276 /* Iteration header */
3a384297
BR
2277 if (!spapr->htab_shift) {
2278 qemu_put_be32(f, -1);
e8cd4247 2279 return 1;
3a384297
BR
2280 } else {
2281 qemu_put_be32(f, 0);
2282 }
4be21d56 2283
e68cb8b4
AK
2284 if (!spapr->htab) {
2285 assert(kvm_enabled());
2286
715c5407
DG
2287 fd = get_htab_fd(spapr);
2288 if (fd < 0) {
2289 return fd;
01a57972
SMJ
2290 }
2291
715c5407 2292 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
e68cb8b4
AK
2293 if (rc < 0) {
2294 return rc;
2295 }
2296 } else if (spapr->htab_first_pass) {
4be21d56
DG
2297 htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
2298 } else {
e68cb8b4 2299 rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
4be21d56
DG
2300 }
2301
332f7721 2302 htab_save_end_marker(f);
4be21d56 2303
e68cb8b4 2304 return rc;
4be21d56
DG
2305}
2306
2307static int htab_save_complete(QEMUFile *f, void *opaque)
2308{
28e02042 2309 sPAPRMachineState *spapr = opaque;
715c5407 2310 int fd;
4be21d56
DG
2311
2312 /* Iteration header */
3a384297
BR
2313 if (!spapr->htab_shift) {
2314 qemu_put_be32(f, -1);
2315 return 0;
2316 } else {
2317 qemu_put_be32(f, 0);
2318 }
4be21d56 2319
e68cb8b4
AK
2320 if (!spapr->htab) {
2321 int rc;
2322
2323 assert(kvm_enabled());
2324
715c5407
DG
2325 fd = get_htab_fd(spapr);
2326 if (fd < 0) {
2327 return fd;
01a57972
SMJ
2328 }
2329
715c5407 2330 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1);
e68cb8b4
AK
2331 if (rc < 0) {
2332 return rc;
2333 }
e68cb8b4 2334 } else {
378bc217
DG
2335 if (spapr->htab_first_pass) {
2336 htab_save_first_pass(f, spapr, -1);
2337 }
e68cb8b4
AK
2338 htab_save_later_pass(f, spapr, -1);
2339 }
4be21d56
DG
2340
2341 /* End marker */
332f7721 2342 htab_save_end_marker(f);
4be21d56
DG
2343
2344 return 0;
2345}
2346
2347static int htab_load(QEMUFile *f, void *opaque, int version_id)
2348{
28e02042 2349 sPAPRMachineState *spapr = opaque;
4be21d56 2350 uint32_t section_hdr;
e68cb8b4 2351 int fd = -1;
14b0d748 2352 Error *local_err = NULL;
4be21d56
DG
2353
2354 if (version_id < 1 || version_id > 1) {
98a5d100 2355 error_report("htab_load() bad version");
4be21d56
DG
2356 return -EINVAL;
2357 }
2358
2359 section_hdr = qemu_get_be32(f);
2360
3a384297
BR
2361 if (section_hdr == -1) {
2362 spapr_free_hpt(spapr);
2363 return 0;
2364 }
2365
4be21d56 2366 if (section_hdr) {
c5f54f3e
DG
2367 /* First section gives the htab size */
2368 spapr_reallocate_hpt(spapr, section_hdr, &local_err);
2369 if (local_err) {
2370 error_report_err(local_err);
4be21d56
DG
2371 return -EINVAL;
2372 }
2373 return 0;
2374 }
2375
e68cb8b4
AK
2376 if (!spapr->htab) {
2377 assert(kvm_enabled());
2378
14b0d748 2379 fd = kvmppc_get_htab_fd(true, 0, &local_err);
e68cb8b4 2380 if (fd < 0) {
14b0d748 2381 error_report_err(local_err);
82be8e73 2382 return fd;
e68cb8b4
AK
2383 }
2384 }
2385
4be21d56
DG
2386 while (true) {
2387 uint32_t index;
2388 uint16_t n_valid, n_invalid;
2389
2390 index = qemu_get_be32(f);
2391 n_valid = qemu_get_be16(f);
2392 n_invalid = qemu_get_be16(f);
2393
2394 if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
2395 /* End of Stream */
2396 break;
2397 }
2398
e68cb8b4 2399 if ((index + n_valid + n_invalid) >
4be21d56
DG
2400 (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
2401 /* Bad index in stream */
98a5d100
DG
2402 error_report(
2403 "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)",
2404 index, n_valid, n_invalid, spapr->htab_shift);
4be21d56
DG
2405 return -EINVAL;
2406 }
2407
e68cb8b4
AK
2408 if (spapr->htab) {
2409 if (n_valid) {
2410 qemu_get_buffer(f, HPTE(spapr->htab, index),
2411 HASH_PTE_SIZE_64 * n_valid);
2412 }
2413 if (n_invalid) {
2414 memset(HPTE(spapr->htab, index + n_valid), 0,
2415 HASH_PTE_SIZE_64 * n_invalid);
2416 }
2417 } else {
2418 int rc;
2419
2420 assert(fd >= 0);
2421
2422 rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
2423 if (rc < 0) {
2424 return rc;
2425 }
4be21d56
DG
2426 }
2427 }
2428
e68cb8b4
AK
2429 if (!spapr->htab) {
2430 assert(fd >= 0);
2431 close(fd);
2432 }
2433
4be21d56
DG
2434 return 0;
2435}
2436
70f794fc 2437static void htab_save_cleanup(void *opaque)
c573fc03
TH
2438{
2439 sPAPRMachineState *spapr = opaque;
2440
2441 close_htab_fd(spapr);
2442}
2443
4be21d56 2444static SaveVMHandlers savevm_htab_handlers = {
9907e842 2445 .save_setup = htab_save_setup,
4be21d56 2446 .save_live_iterate = htab_save_iterate,
a3e06c3d 2447 .save_live_complete_precopy = htab_save_complete,
70f794fc 2448 .save_cleanup = htab_save_cleanup,
4be21d56
DG
2449 .load_state = htab_load,
2450};
2451
5b2128d2
AG
2452static void spapr_boot_set(void *opaque, const char *boot_device,
2453 Error **errp)
2454{
c86c1aff 2455 MachineState *machine = MACHINE(opaque);
5b2128d2
AG
2456 machine->boot_order = g_strdup(boot_device);
2457}
2458
224245bf
DG
2459static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
2460{
2461 MachineState *machine = MACHINE(spapr);
2462 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
e8f986fc 2463 uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
224245bf
DG
2464 int i;
2465
2466 for (i = 0; i < nr_lmbs; i++) {
224245bf
DG
2467 uint64_t addr;
2468
b0c14ec4 2469 addr = i * lmb_size + machine->device_memory->base;
6caf3ac6
DG
2470 spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_LMB,
2471 addr / lmb_size);
224245bf
DG
2472 }
2473}
2474
2475/*
2476 * If RAM size, maxmem size and individual node mem sizes aren't aligned
2477 * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest
2478 * since we can't support such unaligned sizes with DRCONF_MEMORY.
2479 */
7c150d6f 2480static void spapr_validate_node_memory(MachineState *machine, Error **errp)
224245bf
DG
2481{
2482 int i;
2483
7c150d6f
DG
2484 if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
2485 error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
ab3dd749 2486 " is not aligned to %" PRIu64 " MiB",
7c150d6f 2487 machine->ram_size,
d23b6caa 2488 SPAPR_MEMORY_BLOCK_SIZE / MiB);
7c150d6f
DG
2489 return;
2490 }
2491
2492 if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
2493 error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
ab3dd749 2494 " is not aligned to %" PRIu64 " MiB",
7c150d6f 2495 machine->ram_size,
d23b6caa 2496 SPAPR_MEMORY_BLOCK_SIZE / MiB);
7c150d6f 2497 return;
224245bf
DG
2498 }
2499
2500 for (i = 0; i < nb_numa_nodes; i++) {
2501 if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
7c150d6f
DG
2502 error_setg(errp,
2503 "Node %d memory size 0x%" PRIx64
ab3dd749 2504 " is not aligned to %" PRIu64 " MiB",
7c150d6f 2505 i, numa_info[i].node_mem,
d23b6caa 2506 SPAPR_MEMORY_BLOCK_SIZE / MiB);
7c150d6f 2507 return;
224245bf
DG
2508 }
2509 }
2510}
2511
535455fd
IM
2512/* find cpu slot in machine->possible_cpus by core_id */
2513static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
2514{
2515 int index = id / smp_threads;
2516
2517 if (index >= ms->possible_cpus->len) {
2518 return NULL;
2519 }
2520 if (idx) {
2521 *idx = index;
2522 }
2523 return &ms->possible_cpus->cpus[index];
2524}
2525
fa98fbfc
SB
2526static void spapr_set_vsmt_mode(sPAPRMachineState *spapr, Error **errp)
2527{
2528 Error *local_err = NULL;
2529 bool vsmt_user = !!spapr->vsmt;
2530 int kvm_smt = kvmppc_smt_threads();
2531 int ret;
2532
2533 if (!kvm_enabled() && (smp_threads > 1)) {
2534 error_setg(&local_err, "TCG cannot support more than 1 thread/core "
2535 "on a pseries machine");
2536 goto out;
2537 }
2538 if (!is_power_of_2(smp_threads)) {
2539 error_setg(&local_err, "Cannot support %d threads/core on a pseries "
2540 "machine because it must be a power of 2", smp_threads);
2541 goto out;
2542 }
2543
2544 /* Detemine the VSMT mode to use: */
2545 if (vsmt_user) {
2546 if (spapr->vsmt < smp_threads) {
2547 error_setg(&local_err, "Cannot support VSMT mode %d"
2548 " because it must be >= threads/core (%d)",
2549 spapr->vsmt, smp_threads);
2550 goto out;
2551 }
2552 /* In this case, spapr->vsmt has been set by the command line */
2553 } else {
8904e5a7
DG
2554 /*
2555 * Default VSMT value is tricky, because we need it to be as
2556 * consistent as possible (for migration), but this requires
2557 * changing it for at least some existing cases. We pick 8 as
2558 * the value that we'd get with KVM on POWER8, the
2559 * overwhelmingly common case in production systems.
2560 */
4ad64cbd 2561 spapr->vsmt = MAX(8, smp_threads);
fa98fbfc
SB
2562 }
2563
2564 /* KVM: If necessary, set the SMT mode: */
2565 if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
2566 ret = kvmppc_set_smt_threads(spapr->vsmt);
2567 if (ret) {
1f20f2e0 2568 /* Looks like KVM isn't able to change VSMT mode */
fa98fbfc
SB
2569 error_setg(&local_err,
2570 "Failed to set KVM's VSMT mode to %d (errno %d)",
2571 spapr->vsmt, ret);
1f20f2e0
DG
2572 /* We can live with that if the default one is big enough
2573 * for the number of threads, and a submultiple of the one
2574 * we want. In this case we'll waste some vcpu ids, but
2575 * behaviour will be correct */
2576 if ((kvm_smt >= smp_threads) && ((spapr->vsmt % kvm_smt) == 0)) {
2577 warn_report_err(local_err);
2578 local_err = NULL;
2579 goto out;
2580 } else {
2581 if (!vsmt_user) {
2582 error_append_hint(&local_err,
2583 "On PPC, a VM with %d threads/core"
2584 " on a host with %d threads/core"
2585 " requires the use of VSMT mode %d.\n",
2586 smp_threads, kvm_smt, spapr->vsmt);
2587 }
2588 kvmppc_hint_smt_possible(&local_err);
2589 goto out;
fa98fbfc 2590 }
fa98fbfc
SB
2591 }
2592 }
2593 /* else TCG: nothing to do currently */
2594out:
2595 error_propagate(errp, local_err);
2596}
2597
1a5008fc
GK
2598static void spapr_init_cpus(sPAPRMachineState *spapr)
2599{
2600 MachineState *machine = MACHINE(spapr);
2601 MachineClass *mc = MACHINE_GET_CLASS(machine);
2602 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
2603 const char *type = spapr_get_cpu_core_type(machine->cpu_type);
2604 const CPUArchIdList *possible_cpus;
2605 int boot_cores_nr = smp_cpus / smp_threads;
2606 int i;
2607
2608 possible_cpus = mc->possible_cpu_arch_ids(machine);
2609 if (mc->has_hotpluggable_cpus) {
2610 if (smp_cpus % smp_threads) {
2611 error_report("smp_cpus (%u) must be multiple of threads (%u)",
2612 smp_cpus, smp_threads);
2613 exit(1);
2614 }
2615 if (max_cpus % smp_threads) {
2616 error_report("max_cpus (%u) must be multiple of threads (%u)",
2617 max_cpus, smp_threads);
2618 exit(1);
2619 }
2620 } else {
2621 if (max_cpus != smp_cpus) {
2622 error_report("This machine version does not support CPU hotplug");
2623 exit(1);
2624 }
2625 boot_cores_nr = possible_cpus->len;
2626 }
2627
1a5008fc
GK
2628 if (smc->pre_2_10_has_unused_icps) {
2629 int i;
2630
1a518e76 2631 for (i = 0; i < spapr_max_server_number(spapr); i++) {
1a5008fc
GK
2632 /* Dummy entries get deregistered when real ICPState objects
2633 * are registered during CPU core hotplug.
2634 */
2635 pre_2_10_vmstate_register_dummy_icp(i);
2636 }
2637 }
2638
2639 for (i = 0; i < possible_cpus->len; i++) {
2640 int core_id = i * smp_threads;
2641
2642 if (mc->has_hotpluggable_cpus) {
2643 spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_CPU,
2644 spapr_vcpu_id(spapr, core_id));
2645 }
2646
2647 if (i < boot_cores_nr) {
2648 Object *core = object_new(type);
2649 int nr_threads = smp_threads;
2650
2651 /* Handle the partially filled core for older machine types */
2652 if ((i + 1) * smp_threads >= smp_cpus) {
2653 nr_threads = smp_cpus - i * smp_threads;
2654 }
2655
2656 object_property_set_int(core, nr_threads, "nr-threads",
2657 &error_fatal);
2658 object_property_set_int(core, core_id, CPU_CORE_PROP_CORE_ID,
2659 &error_fatal);
2660 object_property_set_bool(core, true, "realized", &error_fatal);
ecda255e
SB
2661
2662 object_unref(core);
1a5008fc
GK
2663 }
2664 }
2665}
2666
999c9caf
GK
2667static PCIHostState *spapr_create_default_phb(void)
2668{
2669 DeviceState *dev;
2670
2671 dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
2672 qdev_prop_set_uint32(dev, "index", 0);
2673 qdev_init_nofail(dev);
2674
2675 return PCI_HOST_BRIDGE(dev);
2676}
2677
9fdf0c29 2678/* pSeries LPAR / sPAPR hardware init */
bcb5ce08 2679static void spapr_machine_init(MachineState *machine)
9fdf0c29 2680{
28e02042 2681 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
224245bf 2682 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
3ef96221 2683 const char *kernel_filename = machine->kernel_filename;
3ef96221 2684 const char *initrd_filename = machine->initrd_filename;
8c9f64df 2685 PCIHostState *phb;
9fdf0c29 2686 int i;
890c2b77
AK
2687 MemoryRegion *sysmem = get_system_memory();
2688 MemoryRegion *ram = g_new(MemoryRegion, 1);
c86c1aff 2689 hwaddr node0_size = spapr_node0_size(machine);
b7d1f77a 2690 long load_limit, fw_size;
39ac8455 2691 char *filename;
30f4b05b 2692 Error *resize_hpt_err = NULL;
9fdf0c29 2693
226419d6 2694 msi_nonbroken = true;
0ee2c058 2695
d43b45e2 2696 QLIST_INIT(&spapr->phbs);
0cffce56 2697 QTAILQ_INIT(&spapr->pending_dimm_unplugs);
d43b45e2 2698
9f6edd06
DG
2699 /* Determine capabilities to run with */
2700 spapr_caps_init(spapr);
2701
30f4b05b
DG
2702 kvmppc_check_papr_resize_hpt(&resize_hpt_err);
2703 if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
2704 /*
2705 * If the user explicitly requested a mode we should either
2706 * supply it, or fail completely (which we do below). But if
2707 * it's not set explicitly, we reset our mode to something
2708 * that works
2709 */
2710 if (resize_hpt_err) {
2711 spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
2712 error_free(resize_hpt_err);
2713 resize_hpt_err = NULL;
2714 } else {
2715 spapr->resize_hpt = smc->resize_hpt_default;
2716 }
2717 }
2718
2719 assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
2720
2721 if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
2722 /*
2723 * User requested HPT resize, but this host can't supply it. Bail out
2724 */
2725 error_report_err(resize_hpt_err);
2726 exit(1);
2727 }
2728
090052aa 2729 spapr->rma_size = node0_size;
354ac20a 2730
090052aa
DG
2731 /* With KVM, we don't actually know whether KVM supports an
2732 * unbounded RMA (PR KVM) or is limited by the hash table size
2733 * (HV KVM using VRMA), so we always assume the latter
2734 *
2735 * In that case, we also limit the initial allocations for RTAS
2736 * etc... to 256M since we have no way to know what the VRMA size
2737 * is going to be as it depends on the size of the hash table
2738 * which isn't determined yet.
2739 */
2740 if (kvm_enabled()) {
2741 spapr->vrma_adjust = 1;
2742 spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
354ac20a 2743 }
7f763a5d 2744
090052aa
DG
2745 /* Actually we don't support unbounded RMA anymore since we added
2746 * proper emulation of HV mode. The max we can get is 16G which
2747 * also happens to be what we configure for PAPR mode so make sure
2748 * we don't do anything bigger than that
2749 */
2750 spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
354ac20a 2751
c4177479 2752 if (spapr->rma_size > node0_size) {
d54e4d76
DG
2753 error_report("Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")",
2754 spapr->rma_size);
c4177479
AK
2755 exit(1);
2756 }
2757
b7d1f77a
BH
2758 /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
2759 load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
9fdf0c29 2760
482969d6
CLG
2761 /*
2762 * VSMT must be set in order to be able to compute VCPU ids, ie to
1a518e76 2763 * call spapr_max_server_number() or spapr_vcpu_id().
482969d6
CLG
2764 */
2765 spapr_set_vsmt_mode(spapr, &error_fatal);
2766
7b565160 2767 /* Set up Interrupt Controller before we create the VCPUs */
fab397d8 2768 spapr_irq_init(spapr, &error_fatal);
7b565160 2769
dc1b5eee
GK
2770 /* Set up containers for ibm,client-architecture-support negotiated options
2771 */
facdb8b6
MR
2772 spapr->ov5 = spapr_ovec_new();
2773 spapr->ov5_cas = spapr_ovec_new();
2774
224245bf 2775 if (smc->dr_lmb_enabled) {
facdb8b6 2776 spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
7c150d6f 2777 spapr_validate_node_memory(machine, &error_fatal);
224245bf
DG
2778 }
2779
417ece33
MR
2780 spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
2781
ffbb1705
MR
2782 /* advertise support for dedicated HP event source to guests */
2783 if (spapr->use_hotplug_event_source) {
2784 spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
2785 }
2786
2772cf6b
DG
2787 /* advertise support for HPT resizing */
2788 if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
2789 spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
2790 }
2791
a324d6f1
BR
2792 /* advertise support for ibm,dyamic-memory-v2 */
2793 spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
2794
db592b5b 2795 /* advertise XIVE on POWER9 machines */
13db0cd9 2796 if (spapr->irq->ov5 & (SPAPR_OV5_XIVE_EXPLOIT | SPAPR_OV5_XIVE_BOTH)) {
db592b5b
CLG
2797 if (ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
2798 0, spapr->max_compat_pvr)) {
2799 spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
13db0cd9 2800 } else if (spapr->irq->ov5 & SPAPR_OV5_XIVE_EXPLOIT) {
db592b5b
CLG
2801 error_report("XIVE-only machines require a POWER9 CPU");
2802 exit(1);
2803 }
2804 }
2805
9fdf0c29 2806 /* init CPUs */
0c86d0fd 2807 spapr_init_cpus(spapr);
9fdf0c29 2808
0550b120 2809 if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
ad99d04c
DG
2810 ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
2811 spapr->max_compat_pvr)) {
0550b120
GK
2812 /* KVM and TCG always allow GTSE with radix... */
2813 spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
2814 }
2815 /* ... but not with hash (currently). */
2816
026bfd89
DG
2817 if (kvm_enabled()) {
2818 /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
2819 kvmppc_enable_logical_ci_hcalls();
ef9971dd 2820 kvmppc_enable_set_mode_hcall();
5145ad4f
NW
2821
2822 /* H_CLEAR_MOD/_REF are mandatory in PAPR, but off by default */
2823 kvmppc_enable_clear_ref_mod_hcalls();
026bfd89
DG
2824 }
2825
9fdf0c29 2826 /* allocate RAM */
f92f5da1 2827 memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
fb164994 2828 machine->ram_size);
f92f5da1 2829 memory_region_add_subregion(sysmem, 0, ram);
9fdf0c29 2830
b0c14ec4
DH
2831 /* always allocate the device memory information */
2832 machine->device_memory = g_malloc0(sizeof(*machine->device_memory));
2833
4a1c9cf0
BR
2834 /* initialize hotplug memory address space */
2835 if (machine->ram_size < machine->maxram_size) {
0c9269a5 2836 ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
71c9a3dd
BR
2837 /*
2838 * Limit the number of hotpluggable memory slots to half the number
2839 * slots that KVM supports, leaving the other half for PCI and other
2840 * devices. However ensure that number of slots doesn't drop below 32.
2841 */
2842 int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 :
2843 SPAPR_MAX_RAM_SLOTS;
4a1c9cf0 2844
71c9a3dd
BR
2845 if (max_memslots < SPAPR_MAX_RAM_SLOTS) {
2846 max_memslots = SPAPR_MAX_RAM_SLOTS;
2847 }
2848 if (machine->ram_slots > max_memslots) {
d54e4d76
DG
2849 error_report("Specified number of memory slots %"
2850 PRIu64" exceeds max supported %d",
71c9a3dd 2851 machine->ram_slots, max_memslots);
d54e4d76 2852 exit(1);
4a1c9cf0
BR
2853 }
2854
b0c14ec4 2855 machine->device_memory->base = ROUND_UP(machine->ram_size,
0c9269a5 2856 SPAPR_DEVICE_MEM_ALIGN);
b0c14ec4 2857 memory_region_init(&machine->device_memory->mr, OBJECT(spapr),
0c9269a5 2858 "device-memory", device_mem_size);
b0c14ec4
DH
2859 memory_region_add_subregion(sysmem, machine->device_memory->base,
2860 &machine->device_memory->mr);
4a1c9cf0
BR
2861 }
2862
224245bf
DG
2863 if (smc->dr_lmb_enabled) {
2864 spapr_create_lmb_dr_connectors(spapr);
2865 }
2866
39ac8455 2867 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
4c56440d 2868 if (!filename) {
730fce59 2869 error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
4c56440d
SW
2870 exit(1);
2871 }
b7d1f77a 2872 spapr->rtas_size = get_image_size(filename);
8afc22a2
ZJ
2873 if (spapr->rtas_size < 0) {
2874 error_report("Could not get size of LPAR rtas '%s'", filename);
2875 exit(1);
2876 }
b7d1f77a
BH
2877 spapr->rtas_blob = g_malloc(spapr->rtas_size);
2878 if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
730fce59 2879 error_report("Could not load LPAR rtas '%s'", filename);
39ac8455
DG
2880 exit(1);
2881 }
4d8d5467 2882 if (spapr->rtas_size > RTAS_MAX_SIZE) {
730fce59
TH
2883 error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
2884 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
4d8d5467
BH
2885 exit(1);
2886 }
7267c094 2887 g_free(filename);
39ac8455 2888
ffbb1705 2889 /* Set up RTAS event infrastructure */
74d042e5
DG
2890 spapr_events_init(spapr);
2891
12f42174 2892 /* Set up the RTC RTAS interfaces */
28df36a1 2893 spapr_rtc_create(spapr);
12f42174 2894
b5cec4c5 2895 /* Set up VIO bus */
4040ab72
DG
2896 spapr->vio_bus = spapr_vio_bus_init();
2897
b8846a4d 2898 for (i = 0; i < serial_max_hds(); i++) {
9bca0edb
PM
2899 if (serial_hd(i)) {
2900 spapr_vty_create(spapr->vio_bus, serial_hd(i));
4040ab72
DG
2901 }
2902 }
9fdf0c29 2903
639e8102
DG
2904 /* We always have at least the nvram device on VIO */
2905 spapr_create_nvram(spapr);
2906
962b6c36
MR
2907 /*
2908 * Setup hotplug / dynamic-reconfiguration connectors. top-level
2909 * connectors (described in root DT node's "ibm,drc-types" property)
2910 * are pre-initialized here. additional child connectors (such as
2911 * connectors for a PHBs PCI slots) are added as needed during their
2912 * parent's realization.
2913 */
2914 if (smc->dr_phb_enabled) {
2915 for (i = 0; i < SPAPR_MAX_PHBS; i++) {
2916 spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i);
2917 }
2918 }
2919
3384f95c 2920 /* Set up PCI */
fa28f71b
AK
2921 spapr_pci_rtas_init();
2922
999c9caf 2923 phb = spapr_create_default_phb();
3384f95c 2924
277f9acf 2925 for (i = 0; i < nb_nics; i++) {
8d90ad90
DG
2926 NICInfo *nd = &nd_table[i];
2927
2928 if (!nd->model) {
3c3a4e7a 2929 nd->model = g_strdup("spapr-vlan");
8d90ad90
DG
2930 }
2931
3c3a4e7a
TH
2932 if (g_str_equal(nd->model, "spapr-vlan") ||
2933 g_str_equal(nd->model, "ibmveth")) {
d601fac4 2934 spapr_vlan_create(spapr->vio_bus, nd);
8d90ad90 2935 } else {
29b358f9 2936 pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
8d90ad90
DG
2937 }
2938 }
2939
6e270446 2940 for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
d601fac4 2941 spapr_vscsi_create(spapr->vio_bus);
6e270446
BH
2942 }
2943
f28359d8 2944 /* Graphics */
14c6a894 2945 if (spapr_vga_init(phb->bus, &error_fatal)) {
3fc5acde 2946 spapr->has_graphics = true;
c6e76503 2947 machine->usb |= defaults_enabled() && !machine->usb_disabled;
f28359d8
LZ
2948 }
2949
4ee9ced9 2950 if (machine->usb) {
57040d45
TH
2951 if (smc->use_ohci_by_default) {
2952 pci_create_simple(phb->bus, -1, "pci-ohci");
2953 } else {
2954 pci_create_simple(phb->bus, -1, "nec-usb-xhci");
2955 }
c86580b8 2956
35139a59 2957 if (spapr->has_graphics) {
c86580b8
MA
2958 USBBus *usb_bus = usb_bus_find(-1);
2959
2960 usb_create_simple(usb_bus, "usb-kbd");
2961 usb_create_simple(usb_bus, "usb-mouse");
35139a59
DG
2962 }
2963 }
2964
ab3dd749 2965 if (spapr->rma_size < (MIN_RMA_SLOF * MiB)) {
d54e4d76
DG
2966 error_report(
2967 "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)",
2968 MIN_RMA_SLOF);
4d8d5467
BH
2969 exit(1);
2970 }
2971
9fdf0c29
DG
2972 if (kernel_filename) {
2973 uint64_t lowaddr = 0;
2974
4366e1db
LM
2975 spapr->kernel_size = load_elf(kernel_filename, NULL,
2976 translate_kernel_address, NULL,
2977 NULL, &lowaddr, NULL, 1,
a19f7fb0
DG
2978 PPC_ELF_MACHINE, 0, 0);
2979 if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
4366e1db 2980 spapr->kernel_size = load_elf(kernel_filename, NULL,
a19f7fb0
DG
2981 translate_kernel_address, NULL, NULL,
2982 &lowaddr, NULL, 0, PPC_ELF_MACHINE,
2983 0, 0);
2984 spapr->kernel_le = spapr->kernel_size > 0;
16457e7f 2985 }
a19f7fb0
DG
2986 if (spapr->kernel_size < 0) {
2987 error_report("error loading %s: %s", kernel_filename,
2988 load_elf_strerror(spapr->kernel_size));
9fdf0c29
DG
2989 exit(1);
2990 }
2991
2992 /* load initrd */
2993 if (initrd_filename) {
4d8d5467
BH
2994 /* Try to locate the initrd in the gap between the kernel
2995 * and the firmware. Add a bit of space just in case
2996 */
a19f7fb0
DG
2997 spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
2998 + 0x1ffff) & ~0xffff;
2999 spapr->initrd_size = load_image_targphys(initrd_filename,
3000 spapr->initrd_base,
3001 load_limit
3002 - spapr->initrd_base);
3003 if (spapr->initrd_size < 0) {
d54e4d76
DG
3004 error_report("could not load initial ram disk '%s'",
3005 initrd_filename);
9fdf0c29
DG
3006 exit(1);
3007 }
9fdf0c29 3008 }
4d8d5467 3009 }
a3467baa 3010
8e7ea787
AF
3011 if (bios_name == NULL) {
3012 bios_name = FW_FILE_NAME;
3013 }
3014 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
4c56440d 3015 if (!filename) {
68fea5a0 3016 error_report("Could not find LPAR firmware '%s'", bios_name);
4c56440d
SW
3017 exit(1);
3018 }
4d8d5467 3019 fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
68fea5a0
TH
3020 if (fw_size <= 0) {
3021 error_report("Could not load LPAR firmware '%s'", filename);
4d8d5467
BH
3022 exit(1);
3023 }
3024 g_free(filename);
4d8d5467 3025
28e02042
DG
3026 /* FIXME: Should register things through the MachineState's qdev
3027 * interface, this is a legacy from the sPAPREnvironment structure
3028 * which predated MachineState but had a similar function */
4be21d56
DG
3029 vmstate_register(NULL, 0, &vmstate_spapr, spapr);
3030 register_savevm_live(NULL, "spapr/htab", -1, 1,
3031 &savevm_htab_handlers, spapr);
3032
bb2bdd81
GK
3033 qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine),
3034 &error_fatal);
3035
5b2128d2 3036 qemu_register_boot_set(spapr_boot_set, spapr);
42043e4f 3037
42043e4f 3038 if (kvm_enabled()) {
3dc410ae 3039 /* to stop and start vmclock */
42043e4f
LV
3040 qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
3041 &spapr->tb);
3dc410ae
AK
3042
3043 kvmppc_spapr_enable_inkernel_multitce();
42043e4f 3044 }
9fdf0c29
DG
3045}
3046
dc0ca80e 3047static int spapr_kvm_type(MachineState *machine, const char *vm_type)
135a129a
AK
3048{
3049 if (!vm_type) {
3050 return 0;
3051 }
3052
3053 if (!strcmp(vm_type, "HV")) {
3054 return 1;
3055 }
3056
3057 if (!strcmp(vm_type, "PR")) {
3058 return 2;
3059 }
3060
3061 error_report("Unknown kvm-type specified '%s'", vm_type);
3062 exit(1);
3063}
3064
71461b0f 3065/*
627b84f4 3066 * Implementation of an interface to adjust firmware path
71461b0f
AK
3067 * for the bootindex property handling.
3068 */
3069static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
3070 DeviceState *dev)
3071{
3072#define CAST(type, obj, name) \
3073 ((type *)object_dynamic_cast(OBJECT(obj), (name)))
3074 SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE);
3075 sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
c4e13492 3076 VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON);
71461b0f
AK
3077
3078 if (d) {
3079 void *spapr = CAST(void, bus->parent, "spapr-vscsi");
3080 VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
3081 USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
3082
3083 if (spapr) {
3084 /*
3085 * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
1ac24c91
TH
3086 * In the top 16 bits of the 64-bit LUN, we use SRP luns of the form
3087 * 0x8000 | (target << 8) | (bus << 5) | lun
3088 * (see the "Logical unit addressing format" table in SAM5)
71461b0f 3089 */
1ac24c91 3090 unsigned id = 0x8000 | (d->id << 8) | (d->channel << 5) | d->lun;
71461b0f
AK
3091 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
3092 (uint64_t)id << 48);
3093 } else if (virtio) {
3094 /*
3095 * We use SRP luns of the form 01000000 | (target << 8) | lun
3096 * in the top 32 bits of the 64-bit LUN
3097 * Note: the quote above is from SLOF and it is wrong,
3098 * the actual binding is:
3099 * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
3100 */
3101 unsigned id = 0x1000000 | (d->id << 16) | d->lun;
bac658d1
TH
3102 if (d->lun >= 256) {
3103 /* Use the LUN "flat space addressing method" */
3104 id |= 0x4000;
3105 }
71461b0f
AK
3106 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
3107 (uint64_t)id << 32);
3108 } else if (usb) {
3109 /*
3110 * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
3111 * in the top 32 bits of the 64-bit LUN
3112 */
3113 unsigned usb_port = atoi(usb->port->path);
3114 unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
3115 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
3116 (uint64_t)id << 32);
3117 }
3118 }
3119
b99260eb
TH
3120 /*
3121 * SLOF probes the USB devices, and if it recognizes that the device is a
3122 * storage device, it changes its name to "storage" instead of "usb-host",
3123 * and additionally adds a child node for the SCSI LUN, so the correct
3124 * boot path in SLOF is something like .../storage@1/disk@xxx" instead.
3125 */
3126 if (strcmp("usb-host", qdev_fw_name(dev)) == 0) {
3127 USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE);
3128 if (usb_host_dev_is_scsi_storage(usbdev)) {
3129 return g_strdup_printf("storage@%s/disk", usbdev->port->path);
3130 }
3131 }
3132
71461b0f
AK
3133 if (phb) {
3134 /* Replace "pci" with "pci@800000020000000" */
3135 return g_strdup_printf("pci@%"PRIX64, phb->buid);
3136 }
3137
c4e13492
FF
3138 if (vsc) {
3139 /* Same logic as virtio above */
3140 unsigned id = 0x1000000 | (vsc->target << 16) | vsc->lun;
3141 return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32);
3142 }
3143
4871dd4c
TH
3144 if (g_str_equal("pci-bridge", qdev_fw_name(dev))) {
3145 /* SLOF uses "pci" instead of "pci-bridge" for PCI bridges */
3146 PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
3147 return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn));
3148 }
3149
71461b0f
AK
3150 return NULL;
3151}
3152
23825581
EH
3153static char *spapr_get_kvm_type(Object *obj, Error **errp)
3154{
28e02042 3155 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
23825581 3156
28e02042 3157 return g_strdup(spapr->kvm_type);
23825581
EH
3158}
3159
3160static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
3161{
28e02042 3162 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
23825581 3163
28e02042
DG
3164 g_free(spapr->kvm_type);
3165 spapr->kvm_type = g_strdup(value);
23825581
EH
3166}
3167
f6229214
MR
3168static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
3169{
3170 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3171
3172 return spapr->use_hotplug_event_source;
3173}
3174
3175static void spapr_set_modern_hotplug_events(Object *obj, bool value,
3176 Error **errp)
3177{
3178 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3179
3180 spapr->use_hotplug_event_source = value;
3181}
3182
fcad0d21
AK
3183static bool spapr_get_msix_emulation(Object *obj, Error **errp)
3184{
3185 return true;
3186}
3187
30f4b05b
DG
3188static char *spapr_get_resize_hpt(Object *obj, Error **errp)
3189{
3190 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3191
3192 switch (spapr->resize_hpt) {
3193 case SPAPR_RESIZE_HPT_DEFAULT:
3194 return g_strdup("default");
3195 case SPAPR_RESIZE_HPT_DISABLED:
3196 return g_strdup("disabled");
3197 case SPAPR_RESIZE_HPT_ENABLED:
3198 return g_strdup("enabled");
3199 case SPAPR_RESIZE_HPT_REQUIRED:
3200 return g_strdup("required");
3201 }
3202 g_assert_not_reached();
3203}
3204
3205static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
3206{
3207 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3208
3209 if (strcmp(value, "default") == 0) {
3210 spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
3211 } else if (strcmp(value, "disabled") == 0) {
3212 spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
3213 } else if (strcmp(value, "enabled") == 0) {
3214 spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
3215 } else if (strcmp(value, "required") == 0) {
3216 spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
3217 } else {
3218 error_setg(errp, "Bad value for \"resize-hpt\" property");
3219 }
3220}
3221
fa98fbfc
SB
3222static void spapr_get_vsmt(Object *obj, Visitor *v, const char *name,
3223 void *opaque, Error **errp)
3224{
3225 visit_type_uint32(v, name, (uint32_t *)opaque, errp);
3226}
3227
3228static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
3229 void *opaque, Error **errp)
3230{
3231 visit_type_uint32(v, name, (uint32_t *)opaque, errp);
3232}
3233
3ba3d0bc
CLG
3234static char *spapr_get_ic_mode(Object *obj, Error **errp)
3235{
3236 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3237
3238 if (spapr->irq == &spapr_irq_xics_legacy) {
3239 return g_strdup("legacy");
3240 } else if (spapr->irq == &spapr_irq_xics) {
3241 return g_strdup("xics");
3242 } else if (spapr->irq == &spapr_irq_xive) {
3243 return g_strdup("xive");
13db0cd9
CLG
3244 } else if (spapr->irq == &spapr_irq_dual) {
3245 return g_strdup("dual");
3ba3d0bc
CLG
3246 }
3247 g_assert_not_reached();
3248}
3249
3250static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
3251{
3252 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3253
21df5e4f
GK
3254 if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
3255 error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode");
3256 return;
3257 }
3258
3ba3d0bc
CLG
3259 /* The legacy IRQ backend can not be set */
3260 if (strcmp(value, "xics") == 0) {
3261 spapr->irq = &spapr_irq_xics;
3262 } else if (strcmp(value, "xive") == 0) {
3263 spapr->irq = &spapr_irq_xive;
13db0cd9
CLG
3264 } else if (strcmp(value, "dual") == 0) {
3265 spapr->irq = &spapr_irq_dual;
3ba3d0bc
CLG
3266 } else {
3267 error_setg(errp, "Bad value for \"ic-mode\" property");
3268 }
3269}
3270
27461d69
PP
3271static char *spapr_get_host_model(Object *obj, Error **errp)
3272{
3273 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3274
3275 return g_strdup(spapr->host_model);
3276}
3277
3278static void spapr_set_host_model(Object *obj, const char *value, Error **errp)
3279{
3280 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3281
3282 g_free(spapr->host_model);
3283 spapr->host_model = g_strdup(value);
3284}
3285
3286static char *spapr_get_host_serial(Object *obj, Error **errp)
3287{
3288 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3289
3290 return g_strdup(spapr->host_serial);
3291}
3292
3293static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
3294{
3295 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3296
3297 g_free(spapr->host_serial);
3298 spapr->host_serial = g_strdup(value);
3299}
3300
bcb5ce08 3301static void spapr_instance_init(Object *obj)
23825581 3302{
715c5407 3303 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3ba3d0bc 3304 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
715c5407
DG
3305
3306 spapr->htab_fd = -1;
f6229214 3307 spapr->use_hotplug_event_source = true;
23825581
EH
3308 object_property_add_str(obj, "kvm-type",
3309 spapr_get_kvm_type, spapr_set_kvm_type, NULL);
49d2e648
MA
3310 object_property_set_description(obj, "kvm-type",
3311 "Specifies the KVM virtualization mode (HV, PR)",
3312 NULL);
f6229214
MR
3313 object_property_add_bool(obj, "modern-hotplug-events",
3314 spapr_get_modern_hotplug_events,
3315 spapr_set_modern_hotplug_events,
3316 NULL);
3317 object_property_set_description(obj, "modern-hotplug-events",
3318 "Use dedicated hotplug event mechanism in"
3319 " place of standard EPOW events when possible"
3320 " (required for memory hot-unplug support)",
3321 NULL);
7843c0d6
DG
3322 ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
3323 "Maximum permitted CPU compatibility mode",
3324 &error_fatal);
30f4b05b
DG
3325
3326 object_property_add_str(obj, "resize-hpt",
3327 spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
3328 object_property_set_description(obj, "resize-hpt",
3329 "Resizing of the Hash Page Table (enabled, disabled, required)",
3330 NULL);
fa98fbfc
SB
3331 object_property_add(obj, "vsmt", "uint32", spapr_get_vsmt,
3332 spapr_set_vsmt, NULL, &spapr->vsmt, &error_abort);
3333 object_property_set_description(obj, "vsmt",
3334 "Virtual SMT: KVM behaves as if this were"
3335 " the host's SMT mode", &error_abort);
fcad0d21
AK
3336 object_property_add_bool(obj, "vfio-no-msix-emulation",
3337 spapr_get_msix_emulation, NULL, NULL);
3ba3d0bc
CLG
3338
3339 /* The machine class defines the default interrupt controller mode */
3340 spapr->irq = smc->irq;
3341 object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
3342 spapr_set_ic_mode, NULL);
3343 object_property_set_description(obj, "ic-mode",
13db0cd9 3344 "Specifies the interrupt controller mode (xics, xive, dual)",
3ba3d0bc 3345 NULL);
27461d69
PP
3346
3347 object_property_add_str(obj, "host-model",
3348 spapr_get_host_model, spapr_set_host_model,
3349 &error_abort);
3350 object_property_set_description(obj, "host-model",
3351 "Set host's model-id to use - none|passthrough|string", &error_abort);
3352 object_property_add_str(obj, "host-serial",
3353 spapr_get_host_serial, spapr_set_host_serial,
3354 &error_abort);
3355 object_property_set_description(obj, "host-serial",
3356 "Set host's system-id to use - none|passthrough|string", &error_abort);
23825581
EH
3357}
3358
87bbdd9c
DG
3359static void spapr_machine_finalizefn(Object *obj)
3360{
3361 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3362
3363 g_free(spapr->kvm_type);
3364}
3365
1c7ad77e 3366void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
34316482 3367{
34316482
AK
3368 cpu_synchronize_state(cs);
3369 ppc_cpu_do_system_reset(cs);
3370}
3371
3372static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
3373{
3374 CPUState *cs;
3375
3376 CPU_FOREACH(cs) {
1c7ad77e 3377 async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
34316482
AK
3378 }
3379}
3380
62d38c9b
GK
3381int spapr_lmb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr,
3382 void *fdt, int *fdt_start_offset, Error **errp)
3383{
3384 uint64_t addr;
3385 uint32_t node;
3386
3387 addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
3388 node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
3389 &error_abort);
3390 *fdt_start_offset = spapr_populate_memory_node(fdt, node, addr,
3391 SPAPR_MEMORY_BLOCK_SIZE);
3392 return 0;
3393}
3394
79b78a6b 3395static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
62d38c9b 3396 bool dedicated_hp_event_source, Error **errp)
c20d332a
BR
3397{
3398 sPAPRDRConnector *drc;
c20d332a 3399 uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
62d38c9b 3400 int i;
79b78a6b 3401 uint64_t addr = addr_start;
94fd9cba 3402 bool hotplugged = spapr_drc_hotplugged(dev);
160bb678 3403 Error *local_err = NULL;
c20d332a 3404
c20d332a 3405 for (i = 0; i < nr_lmbs; i++) {
fbf55397
DG
3406 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3407 addr / SPAPR_MEMORY_BLOCK_SIZE);
c20d332a
BR
3408 g_assert(drc);
3409
09d876ce 3410 spapr_drc_attach(drc, dev, &local_err);
160bb678
GK
3411 if (local_err) {
3412 while (addr > addr_start) {
3413 addr -= SPAPR_MEMORY_BLOCK_SIZE;
3414 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3415 addr / SPAPR_MEMORY_BLOCK_SIZE);
a8dc47fd 3416 spapr_drc_detach(drc);
160bb678 3417 }
160bb678
GK
3418 error_propagate(errp, local_err);
3419 return;
3420 }
94fd9cba
LV
3421 if (!hotplugged) {
3422 spapr_drc_reset(drc);
3423 }
c20d332a
BR
3424 addr += SPAPR_MEMORY_BLOCK_SIZE;
3425 }
5dd5238c
JD
3426 /* send hotplug notification to the
3427 * guest only in case of hotplugged memory
3428 */
94fd9cba 3429 if (hotplugged) {
79b78a6b 3430 if (dedicated_hp_event_source) {
fbf55397
DG
3431 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3432 addr_start / SPAPR_MEMORY_BLOCK_SIZE);
79b78a6b
MR
3433 spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
3434 nr_lmbs,
0b55aa91 3435 spapr_drc_index(drc));
79b78a6b
MR
3436 } else {
3437 spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
3438 nr_lmbs);
3439 }
5dd5238c 3440 }
c20d332a
BR
3441}
3442
3443static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
81985f3b 3444 Error **errp)
c20d332a
BR
3445{
3446 Error *local_err = NULL;
3447 sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
3448 PCDIMMDevice *dimm = PC_DIMM(dev);
b0e62443 3449 uint64_t size, addr;
04790978 3450
946d6154 3451 size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort);
df587133 3452
fd3416f5 3453 pc_dimm_plug(dimm, MACHINE(ms), &local_err);
c20d332a
BR
3454 if (local_err) {
3455 goto out;
3456 }
3457
9ed442b8
MAL
3458 addr = object_property_get_uint(OBJECT(dimm),
3459 PC_DIMM_ADDR_PROP, &local_err);
c20d332a 3460 if (local_err) {
160bb678 3461 goto out_unplug;
c20d332a
BR
3462 }
3463
62d38c9b 3464 spapr_add_lmbs(dev, addr, size, spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
160bb678
GK
3465 &local_err);
3466 if (local_err) {
3467 goto out_unplug;
3468 }
3469
3470 return;
c20d332a 3471
160bb678 3472out_unplug:
fd3416f5 3473 pc_dimm_unplug(dimm, MACHINE(ms));
c20d332a
BR
3474out:
3475 error_propagate(errp, local_err);
3476}
3477
c871bc70
LV
3478static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3479 Error **errp)
3480{
4e8a01bd 3481 const sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
123eec65 3482 sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
c871bc70 3483 PCDIMMDevice *dimm = PC_DIMM(dev);
8f1ffe5b 3484 Error *local_err = NULL;
04790978 3485 uint64_t size;
123eec65
DG
3486 Object *memdev;
3487 hwaddr pagesize;
c871bc70 3488
4e8a01bd
DH
3489 if (!smc->dr_lmb_enabled) {
3490 error_setg(errp, "Memory hotplug not supported for this machine");
3491 return;
3492 }
3493
946d6154
DH
3494 size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
3495 if (local_err) {
3496 error_propagate(errp, local_err);
04790978
TH
3497 return;
3498 }
04790978 3499
c871bc70
LV
3500 if (size % SPAPR_MEMORY_BLOCK_SIZE) {
3501 error_setg(errp, "Hotplugged memory size must be a multiple of "
ab3dd749 3502 "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
c871bc70
LV
3503 return;
3504 }
3505
123eec65
DG
3506 memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
3507 &error_abort);
3508 pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
8f1ffe5b
DH
3509 spapr_check_pagesize(spapr, pagesize, &local_err);
3510 if (local_err) {
3511 error_propagate(errp, local_err);
3512 return;
3513 }
3514
fd3416f5 3515 pc_dimm_pre_plug(dimm, MACHINE(hotplug_dev), NULL, errp);
c871bc70
LV
3516}
3517
0cffce56
DG
3518struct sPAPRDIMMState {
3519 PCDIMMDevice *dimm;
cf632463 3520 uint32_t nr_lmbs;
0cffce56
DG
3521 QTAILQ_ENTRY(sPAPRDIMMState) next;
3522};
3523
3524static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
3525 PCDIMMDevice *dimm)
3526{
3527 sPAPRDIMMState *dimm_state = NULL;
3528
3529 QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) {
3530 if (dimm_state->dimm == dimm) {
3531 break;
3532 }
3533 }
3534 return dimm_state;
3535}
3536
8d5981c4
BR
3537static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
3538 uint32_t nr_lmbs,
3539 PCDIMMDevice *dimm)
0cffce56 3540{
8d5981c4
BR
3541 sPAPRDIMMState *ds = NULL;
3542
3543 /*
3544 * If this request is for a DIMM whose removal had failed earlier
3545 * (due to guest's refusal to remove the LMBs), we would have this
3546 * dimm already in the pending_dimm_unplugs list. In that
3547 * case don't add again.
3548 */
3549 ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
3550 if (!ds) {
3551 ds = g_malloc0(sizeof(sPAPRDIMMState));
3552 ds->nr_lmbs = nr_lmbs;
3553 ds->dimm = dimm;
3554 QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
3555 }
3556 return ds;
0cffce56
DG
3557}
3558
3559static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
3560 sPAPRDIMMState *dimm_state)
3561{
3562 QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next);
3563 g_free(dimm_state);
3564}
cf632463 3565
16ee9980
DHB
3566static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
3567 PCDIMMDevice *dimm)
3568{
3569 sPAPRDRConnector *drc;
946d6154
DH
3570 uint64_t size = memory_device_get_region_size(MEMORY_DEVICE(dimm),
3571 &error_abort);
16ee9980
DHB
3572 uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
3573 uint32_t avail_lmbs = 0;
3574 uint64_t addr_start, addr;
3575 int i;
16ee9980
DHB
3576
3577 addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
3578 &error_abort);
3579
3580 addr = addr_start;
3581 for (i = 0; i < nr_lmbs; i++) {
fbf55397
DG
3582 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3583 addr / SPAPR_MEMORY_BLOCK_SIZE);
16ee9980 3584 g_assert(drc);
454b580a 3585 if (drc->dev) {
16ee9980
DHB
3586 avail_lmbs++;
3587 }
3588 addr += SPAPR_MEMORY_BLOCK_SIZE;
3589 }
3590
8d5981c4 3591 return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
16ee9980
DHB
3592}
3593
31834723
DHB
3594/* Callback to be called during DRC release. */
3595void spapr_lmb_release(DeviceState *dev)
cf632463 3596{
3ec71474
DH
3597 HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
3598 sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
0cffce56 3599 sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
cf632463 3600
16ee9980
DHB
3601 /* This information will get lost if a migration occurs
3602 * during the unplug process. In this case recover it. */
3603 if (ds == NULL) {
3604 ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
8d5981c4 3605 g_assert(ds);
454b580a
DG
3606 /* The DRC being examined by the caller at least must be counted */
3607 g_assert(ds->nr_lmbs);
3608 }
3609
3610 if (--ds->nr_lmbs) {
cf632463
BR
3611 return;
3612 }
3613
cf632463
BR
3614 /*
3615 * Now that all the LMBs have been removed by the guest, call the
3ec71474 3616 * unplug handler chain. This can never fail.
cf632463 3617 */
3ec71474 3618 hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
07578b0a 3619 object_unparent(OBJECT(dev));
3ec71474
DH
3620}
3621
3622static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
3623{
3624 sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
3625 sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
3626
fd3416f5 3627 pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev));
07578b0a 3628 object_property_set_bool(OBJECT(dev), false, "realized", NULL);
2a129767 3629 spapr_pending_dimm_unplugs_remove(spapr, ds);
cf632463
BR
3630}
3631
3632static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
3633 DeviceState *dev, Error **errp)
3634{
0cffce56 3635 sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
cf632463
BR
3636 Error *local_err = NULL;
3637 PCDIMMDevice *dimm = PC_DIMM(dev);
04790978
TH
3638 uint32_t nr_lmbs;
3639 uint64_t size, addr_start, addr;
0cffce56
DG
3640 int i;
3641 sPAPRDRConnector *drc;
04790978 3642
946d6154 3643 size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
04790978
TH
3644 nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
3645
9ed442b8 3646 addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
0cffce56 3647 &local_err);
cf632463
BR
3648 if (local_err) {
3649 goto out;
3650 }
3651
2a129767
DHB
3652 /*
3653 * An existing pending dimm state for this DIMM means that there is an
3654 * unplug operation in progress, waiting for the spapr_lmb_release
3655 * callback to complete the job (BQL can't cover that far). In this case,
3656 * bail out to avoid detaching DRCs that were already released.
3657 */
3658 if (spapr_pending_dimm_unplugs_find(spapr, dimm)) {
3659 error_setg(&local_err,
3660 "Memory unplug already in progress for device %s",
3661 dev->id);
3662 goto out;
3663 }
3664
8d5981c4 3665 spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
0cffce56
DG
3666
3667 addr = addr_start;
3668 for (i = 0; i < nr_lmbs; i++) {
fbf55397
DG
3669 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3670 addr / SPAPR_MEMORY_BLOCK_SIZE);
0cffce56
DG
3671 g_assert(drc);
3672
a8dc47fd 3673 spapr_drc_detach(drc);
0cffce56
DG
3674 addr += SPAPR_MEMORY_BLOCK_SIZE;
3675 }
3676
fbf55397
DG
3677 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3678 addr_start / SPAPR_MEMORY_BLOCK_SIZE);
0cffce56 3679 spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
0b55aa91 3680 nr_lmbs, spapr_drc_index(drc));
cf632463
BR
3681out:
3682 error_propagate(errp, local_err);
3683}
3684
765d1bdd
DG
3685/* Callback to be called during DRC release. */
3686void spapr_core_release(DeviceState *dev)
ff9006dd 3687{
a4261be1
DH
3688 HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
3689
3690 /* Call the unplug handler chain. This can never fail. */
3691 hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
07578b0a 3692 object_unparent(OBJECT(dev));
a4261be1
DH
3693}
3694
3695static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
3696{
3697 MachineState *ms = MACHINE(hotplug_dev);
46f7afa3 3698 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
ff9006dd 3699 CPUCore *cc = CPU_CORE(dev);
535455fd 3700 CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
ff9006dd 3701
46f7afa3
GK
3702 if (smc->pre_2_10_has_unused_icps) {
3703 sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
46f7afa3
GK
3704 int i;
3705
3706 for (i = 0; i < cc->nr_threads; i++) {
94ad93bd 3707 CPUState *cs = CPU(sc->threads[i]);
46f7afa3
GK
3708
3709 pre_2_10_vmstate_register_dummy_icp(cs->cpu_index);
3710 }
3711 }
3712
07572c06 3713 assert(core_slot);
535455fd 3714 core_slot->cpu = NULL;
07578b0a 3715 object_property_set_bool(OBJECT(dev), false, "realized", NULL);
ff9006dd
IM
3716}
3717
115debf2
IM
3718static
3719void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
3720 Error **errp)
ff9006dd 3721{
72194664 3722 sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
535455fd
IM
3723 int index;
3724 sPAPRDRConnector *drc;
535455fd 3725 CPUCore *cc = CPU_CORE(dev);
ff9006dd 3726
535455fd
IM
3727 if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
3728 error_setg(errp, "Unable to find CPU core with core-id: %d",
3729 cc->core_id);
3730 return;
3731 }
ff9006dd
IM
3732 if (index == 0) {
3733 error_setg(errp, "Boot CPU core may not be unplugged");
3734 return;
3735 }
3736
5d0fb150
GK
3737 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
3738 spapr_vcpu_id(spapr, cc->core_id));
ff9006dd
IM
3739 g_assert(drc);
3740
a8dc47fd 3741 spapr_drc_detach(drc);
ff9006dd
IM
3742
3743 spapr_hotplug_req_remove_by_index(drc);
3744}
3745
345b12b9
GK
3746int spapr_core_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr,
3747 void *fdt, int *fdt_start_offset, Error **errp)
3748{
3749 sPAPRCPUCore *core = SPAPR_CPU_CORE(drc->dev);
3750 CPUState *cs = CPU(core->threads[0]);
3751 PowerPCCPU *cpu = POWERPC_CPU(cs);
3752 DeviceClass *dc = DEVICE_GET_CLASS(cs);
3753 int id = spapr_get_vcpu_id(cpu);
3754 char *nodename;
3755 int offset;
3756
3757 nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
3758 offset = fdt_add_subnode(fdt, 0, nodename);
3759 g_free(nodename);
3760
3761 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
3762
3763 *fdt_start_offset = offset;
3764 return 0;
3765}
3766
ff9006dd
IM
3767static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3768 Error **errp)
3769{
3770 sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
3771 MachineClass *mc = MACHINE_GET_CLASS(spapr);
46f7afa3 3772 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
ff9006dd
IM
3773 sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
3774 CPUCore *cc = CPU_CORE(dev);
345b12b9 3775 CPUState *cs;
ff9006dd
IM
3776 sPAPRDRConnector *drc;
3777 Error *local_err = NULL;
535455fd
IM
3778 CPUArchId *core_slot;
3779 int index;
94fd9cba 3780 bool hotplugged = spapr_drc_hotplugged(dev);
ff9006dd 3781
535455fd
IM
3782 core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
3783 if (!core_slot) {
3784 error_setg(errp, "Unable to find CPU core with core-id: %d",
3785 cc->core_id);
3786 return;
3787 }
5d0fb150
GK
3788 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
3789 spapr_vcpu_id(spapr, cc->core_id));
ff9006dd 3790
c5514d0e 3791 g_assert(drc || !mc->has_hotpluggable_cpus);
ff9006dd 3792
ff9006dd 3793 if (drc) {
09d876ce 3794 spapr_drc_attach(drc, dev, &local_err);
ff9006dd 3795 if (local_err) {
ff9006dd
IM
3796 error_propagate(errp, local_err);
3797 return;
3798 }
ff9006dd 3799
94fd9cba
LV
3800 if (hotplugged) {
3801 /*
3802 * Send hotplug notification interrupt to the guest only
3803 * in case of hotplugged CPUs.
3804 */
3805 spapr_hotplug_req_add_by_index(drc);
3806 } else {
3807 spapr_drc_reset(drc);
3808 }
ff9006dd 3809 }
94fd9cba 3810
535455fd 3811 core_slot->cpu = OBJECT(dev);
46f7afa3
GK
3812
3813 if (smc->pre_2_10_has_unused_icps) {
46f7afa3
GK
3814 int i;
3815
3816 for (i = 0; i < cc->nr_threads; i++) {
bc877283 3817 cs = CPU(core->threads[i]);
46f7afa3
GK
3818 pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index);
3819 }
3820 }
ff9006dd
IM
3821}
3822
3823static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3824 Error **errp)
3825{
3826 MachineState *machine = MACHINE(OBJECT(hotplug_dev));
3827 MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
ff9006dd
IM
3828 Error *local_err = NULL;
3829 CPUCore *cc = CPU_CORE(dev);
2e9c10eb 3830 const char *base_core_type = spapr_get_cpu_core_type(machine->cpu_type);
ff9006dd 3831 const char *type = object_get_typename(OBJECT(dev));
535455fd
IM
3832 CPUArchId *core_slot;
3833 int index;
ff9006dd 3834
c5514d0e 3835 if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
ff9006dd
IM
3836 error_setg(&local_err, "CPU hotplug not supported for this machine");
3837 goto out;
3838 }
3839
3840 if (strcmp(base_core_type, type)) {
3841 error_setg(&local_err, "CPU core type should be %s", base_core_type);
3842 goto out;
3843 }
3844
3845 if (cc->core_id % smp_threads) {
3846 error_setg(&local_err, "invalid core id %d", cc->core_id);
3847 goto out;
3848 }
3849
459264ef
DG
3850 /*
3851 * In general we should have homogeneous threads-per-core, but old
3852 * (pre hotplug support) machine types allow the last core to have
3853 * reduced threads as a compatibility hack for when we allowed
3854 * total vcpus not a multiple of threads-per-core.
3855 */
3856 if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
df8658de 3857 error_setg(&local_err, "invalid nr-threads %d, must be %d",
8149e299 3858 cc->nr_threads, smp_threads);
df8658de 3859 goto out;
8149e299
DG
3860 }
3861
535455fd
IM
3862 core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
3863 if (!core_slot) {
ff9006dd
IM
3864 error_setg(&local_err, "core id %d out of range", cc->core_id);
3865 goto out;
3866 }
3867
535455fd 3868 if (core_slot->cpu) {
ff9006dd
IM
3869 error_setg(&local_err, "core %d already populated", cc->core_id);
3870 goto out;
3871 }
3872
a0ceb640 3873 numa_cpu_pre_plug(core_slot, dev, &local_err);
0b8497f0 3874
ff9006dd 3875out:
ff9006dd
IM
3876 error_propagate(errp, local_err);
3877}
3878
bb2bdd81
GK
3879int spapr_phb_dt_populate(sPAPRDRConnector *drc, sPAPRMachineState *spapr,
3880 void *fdt, int *fdt_start_offset, Error **errp)
3881{
3882 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(drc->dev);
3883 int intc_phandle;
3884
3885 intc_phandle = spapr_irq_get_phandle(spapr, spapr->fdt_blob, errp);
3886 if (intc_phandle <= 0) {
3887 return -1;
3888 }
3889
3890 if (spapr_populate_pci_dt(sphb, intc_phandle, fdt, spapr->irq->nr_msis,
3891 fdt_start_offset)) {
3892 error_setg(errp, "unable to create FDT node for PHB %d", sphb->index);
3893 return -1;
3894 }
3895
3896 /* generally SLOF creates these, for hotplug it's up to QEMU */
3897 _FDT(fdt_setprop_string(fdt, *fdt_start_offset, "name", "pci"));
3898
3899 return 0;
3900}
3901
3902static void spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3903 Error **errp)
3904{
3905 sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
3906 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
3907 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
3908 const unsigned windows_supported = spapr_phb_windows_supported(sphb);
3909
3910 if (dev->hotplugged && !smc->dr_phb_enabled) {
3911 error_setg(errp, "PHB hotplug not supported for this machine");
3912 return;
3913 }
3914
3915 if (sphb->index == (uint32_t)-1) {
3916 error_setg(errp, "\"index\" for PAPR PHB is mandatory");
3917 return;
3918 }
3919
3920 /*
3921 * This will check that sphb->index doesn't exceed the maximum number of
3922 * PHBs for the current machine type.
3923 */
3924 smc->phb_placement(spapr, sphb->index,
3925 &sphb->buid, &sphb->io_win_addr,
3926 &sphb->mem_win_addr, &sphb->mem64_win_addr,
3927 windows_supported, sphb->dma_liobn, errp);
3928}
3929
3930static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3931 Error **errp)
3932{
3933 sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
3934 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
3935 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
3936 sPAPRDRConnector *drc;
3937 bool hotplugged = spapr_drc_hotplugged(dev);
3938 Error *local_err = NULL;
3939
3940 if (!smc->dr_phb_enabled) {
3941 return;
3942 }
3943
3944 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
3945 /* hotplug hooks should check it's enabled before getting this far */
3946 assert(drc);
3947
3948 spapr_drc_attach(drc, DEVICE(dev), &local_err);
3949 if (local_err) {
3950 error_propagate(errp, local_err);
3951 return;
3952 }
3953
3954 if (hotplugged) {
3955 spapr_hotplug_req_add_by_index(drc);
3956 } else {
3957 spapr_drc_reset(drc);
3958 }
3959}
3960
3961void spapr_phb_release(DeviceState *dev)
3962{
3963 HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
3964
3965 hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
07578b0a 3966 object_unparent(OBJECT(dev));
bb2bdd81
GK
3967}
3968
3969static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
3970{
07578b0a 3971 object_property_set_bool(OBJECT(dev), false, "realized", NULL);
bb2bdd81
GK
3972}
3973
3974static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev,
3975 DeviceState *dev, Error **errp)
3976{
3977 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
3978 sPAPRDRConnector *drc;
3979
3980 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
3981 assert(drc);
3982
3983 if (!spapr_drc_unplug_requested(drc)) {
3984 spapr_drc_detach(drc);
3985 spapr_hotplug_req_remove_by_index(drc);
3986 }
3987}
3988
c20d332a
BR
3989static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
3990 DeviceState *dev, Error **errp)
3991{
c20d332a 3992 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
81985f3b 3993 spapr_memory_plug(hotplug_dev, dev, errp);
af81cf32
BR
3994 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
3995 spapr_core_plug(hotplug_dev, dev, errp);
bb2bdd81
GK
3996 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
3997 spapr_phb_plug(hotplug_dev, dev, errp);
c20d332a
BR
3998 }
3999}
4000
88432f44
DH
4001static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
4002 DeviceState *dev, Error **errp)
4003{
3ec71474
DH
4004 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
4005 spapr_memory_unplug(hotplug_dev, dev);
a4261be1
DH
4006 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
4007 spapr_core_unplug(hotplug_dev, dev);
bb2bdd81
GK
4008 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
4009 spapr_phb_unplug(hotplug_dev, dev);
3ec71474 4010 }
88432f44
DH
4011}
4012
cf632463
BR
4013static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
4014 DeviceState *dev, Error **errp)
4015{
c86c1aff
DHB
4016 sPAPRMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev));
4017 MachineClass *mc = MACHINE_GET_CLASS(sms);
bb2bdd81 4018 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
cf632463
BR
4019
4020 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
4021 if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
4022 spapr_memory_unplug_request(hotplug_dev, dev, errp);
4023 } else {
4024 /* NOTE: this means there is a window after guest reset, prior to
4025 * CAS negotiation, where unplug requests will fail due to the
4026 * capability not being detected yet. This is a bit different than
4027 * the case with PCI unplug, where the events will be queued and
4028 * eventually handled by the guest after boot
4029 */
4030 error_setg(errp, "Memory hot unplug not supported for this guest");
4031 }
6f4b5c3e 4032 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
c5514d0e 4033 if (!mc->has_hotpluggable_cpus) {
6f4b5c3e
BR
4034 error_setg(errp, "CPU hot unplug not supported on this machine");
4035 return;
4036 }
115debf2 4037 spapr_core_unplug_request(hotplug_dev, dev, errp);
bb2bdd81
GK
4038 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
4039 if (!smc->dr_phb_enabled) {
4040 error_setg(errp, "PHB hot unplug not supported on this machine");
4041 return;
4042 }
4043 spapr_phb_unplug_request(hotplug_dev, dev, errp);
c20d332a
BR
4044 }
4045}
4046
94a94e4c
BR
4047static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
4048 DeviceState *dev, Error **errp)
4049{
c871bc70
LV
4050 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
4051 spapr_memory_pre_plug(hotplug_dev, dev, errp);
4052 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
94a94e4c 4053 spapr_core_pre_plug(hotplug_dev, dev, errp);
bb2bdd81
GK
4054 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
4055 spapr_phb_pre_plug(hotplug_dev, dev, errp);
94a94e4c
BR
4056 }
4057}
4058
7ebaf795
BR
4059static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
4060 DeviceState *dev)
c20d332a 4061{
94a94e4c 4062 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
bb2bdd81
GK
4063 object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE) ||
4064 object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
c20d332a
BR
4065 return HOTPLUG_HANDLER(machine);
4066 }
4067 return NULL;
4068}
4069
ea089eeb
IM
4070static CpuInstanceProperties
4071spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
20bb648d 4072{
ea089eeb
IM
4073 CPUArchId *core_slot;
4074 MachineClass *mc = MACHINE_GET_CLASS(machine);
4075
4076 /* make sure possible_cpu are intialized */
4077 mc->possible_cpu_arch_ids(machine);
4078 /* get CPU core slot containing thread that matches cpu_index */
4079 core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
4080 assert(core_slot);
4081 return core_slot->props;
20bb648d
DG
4082}
4083
79e07936
IM
4084static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
4085{
4086 return idx / smp_cores % nb_numa_nodes;
4087}
4088
535455fd
IM
4089static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
4090{
4091 int i;
d342eb76 4092 const char *core_type;
535455fd
IM
4093 int spapr_max_cores = max_cpus / smp_threads;
4094 MachineClass *mc = MACHINE_GET_CLASS(machine);
4095
c5514d0e 4096 if (!mc->has_hotpluggable_cpus) {
535455fd
IM
4097 spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
4098 }
4099 if (machine->possible_cpus) {
4100 assert(machine->possible_cpus->len == spapr_max_cores);
4101 return machine->possible_cpus;
4102 }
4103
d342eb76
IM
4104 core_type = spapr_get_cpu_core_type(machine->cpu_type);
4105 if (!core_type) {
4106 error_report("Unable to find sPAPR CPU Core definition");
4107 exit(1);
4108 }
4109
535455fd
IM
4110 machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
4111 sizeof(CPUArchId) * spapr_max_cores);
4112 machine->possible_cpus->len = spapr_max_cores;
4113 for (i = 0; i < machine->possible_cpus->len; i++) {
4114 int core_id = i * smp_threads;
4115
d342eb76 4116 machine->possible_cpus->cpus[i].type = core_type;
f2d672c2 4117 machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
535455fd
IM
4118 machine->possible_cpus->cpus[i].arch_id = core_id;
4119 machine->possible_cpus->cpus[i].props.has_core_id = true;
4120 machine->possible_cpus->cpus[i].props.core_id = core_id;
535455fd
IM
4121 }
4122 return machine->possible_cpus;
4123}
4124
6737d9ad 4125static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
daa23699
DG
4126 uint64_t *buid, hwaddr *pio,
4127 hwaddr *mmio32, hwaddr *mmio64,
6737d9ad
DG
4128 unsigned n_dma, uint32_t *liobns, Error **errp)
4129{
357d1e3b
DG
4130 /*
4131 * New-style PHB window placement.
4132 *
4133 * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
4134 * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
4135 * windows.
4136 *
4137 * Some guest kernels can't work with MMIO windows above 1<<46
4138 * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
4139 *
4140 * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
4141 * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the
4142 * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the
4143 * 1TiB 64-bit MMIO windows for each PHB.
4144 */
6737d9ad 4145 const uint64_t base_buid = 0x800000020000000ULL;
6737d9ad
DG
4146 int i;
4147
357d1e3b
DG
4148 /* Sanity check natural alignments */
4149 QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
4150 QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
4151 QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
4152 QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
4153 /* Sanity check bounds */
25e6a118
MT
4154 QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
4155 SPAPR_PCI_MEM32_WIN_SIZE);
4156 QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
4157 SPAPR_PCI_MEM64_WIN_SIZE);
4158
4159 if (index >= SPAPR_MAX_PHBS) {
4160 error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
4161 SPAPR_MAX_PHBS - 1);
6737d9ad
DG
4162 return;
4163 }
4164
4165 *buid = base_buid + index;
4166 for (i = 0; i < n_dma; ++i) {
4167 liobns[i] = SPAPR_PCI_LIOBN(index, i);
4168 }
4169
357d1e3b
DG
4170 *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
4171 *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
4172 *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
6737d9ad
DG
4173}
4174
7844e12b
CLG
4175static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
4176{
4177 sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
4178
4179 return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
4180}
4181
4182static void spapr_ics_resend(XICSFabric *dev)
4183{
4184 sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
4185
4186 ics_resend(spapr->ics);
4187}
4188
81210c20 4189static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
b2fc59aa 4190{
2e886fb3 4191 PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
b2fc59aa 4192
a28b9a5a 4193 return cpu ? spapr_cpu_state(cpu)->icp : NULL;
b2fc59aa
CLG
4194}
4195
6449da45
CLG
4196static void spapr_pic_print_info(InterruptStatsProvider *obj,
4197 Monitor *mon)
4198{
4199 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
6449da45 4200
3ba3d0bc 4201 spapr->irq->print_info(spapr, mon);
6449da45
CLG
4202}
4203
14bb4486 4204int spapr_get_vcpu_id(PowerPCCPU *cpu)
2e886fb3 4205{
b1a568c1 4206 return cpu->vcpu_id;
2e886fb3
SB
4207}
4208
648edb64
GK
4209void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
4210{
4211 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
4212 int vcpu_id;
4213
5d0fb150 4214 vcpu_id = spapr_vcpu_id(spapr, cpu_index);
648edb64
GK
4215
4216 if (kvm_enabled() && !kvm_vcpu_id_is_valid(vcpu_id)) {
4217 error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_id);
4218 error_append_hint(errp, "Adjust the number of cpus to %d "
4219 "or try to raise the number of threads per core\n",
4220 vcpu_id * smp_threads / spapr->vsmt);
4221 return;
4222 }
4223
4224 cpu->vcpu_id = vcpu_id;
4225}
4226
2e886fb3
SB
4227PowerPCCPU *spapr_find_cpu(int vcpu_id)
4228{
4229 CPUState *cs;
4230
4231 CPU_FOREACH(cs) {
4232 PowerPCCPU *cpu = POWERPC_CPU(cs);
4233
14bb4486 4234 if (spapr_get_vcpu_id(cpu) == vcpu_id) {
2e886fb3
SB
4235 return cpu;
4236 }
4237 }
4238
4239 return NULL;
4240}
4241
29ee3247
AK
4242static void spapr_machine_class_init(ObjectClass *oc, void *data)
4243{
4244 MachineClass *mc = MACHINE_CLASS(oc);
224245bf 4245 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
71461b0f 4246 FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
34316482 4247 NMIClass *nc = NMI_CLASS(oc);
c20d332a 4248 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
1d1be34d 4249 PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
7844e12b 4250 XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
6449da45 4251 InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
958db90c 4252
0eb9054c 4253 mc->desc = "pSeries Logical Partition (PAPR compliant)";
907aac2f 4254 mc->ignore_boot_device_suffixes = true;
fc9f38c3
DG
4255
4256 /*
4257 * We set up the default / latest behaviour here. The class_init
4258 * functions for the specific versioned machine types can override
4259 * these details for backwards compatibility
4260 */
bcb5ce08
DG
4261 mc->init = spapr_machine_init;
4262 mc->reset = spapr_machine_reset;
958db90c 4263 mc->block_default_type = IF_SCSI;
6244bb7e 4264 mc->max_cpus = 1024;
958db90c 4265 mc->no_parallel = 1;
5b2128d2 4266 mc->default_boot_order = "";
d23b6caa 4267 mc->default_ram_size = 512 * MiB;
29f9cef3 4268 mc->default_display = "std";
958db90c 4269 mc->kvm_type = spapr_kvm_type;
7da79a16 4270 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
e4024630 4271 mc->pci_allow_0_address = true;
debbdc00 4272 assert(!mc->get_hotplug_handler);
7ebaf795 4273 mc->get_hotplug_handler = spapr_get_hotplug_handler;
94a94e4c 4274 hc->pre_plug = spapr_machine_device_pre_plug;
c20d332a 4275 hc->plug = spapr_machine_device_plug;
ea089eeb 4276 mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
79e07936 4277 mc->get_default_cpu_node_id = spapr_get_default_cpu_node_id;
535455fd 4278 mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
cf632463 4279 hc->unplug_request = spapr_machine_device_unplug_request;
88432f44 4280 hc->unplug = spapr_machine_device_unplug;
00b4fbe2 4281
fc9f38c3 4282 smc->dr_lmb_enabled = true;
fea35ca4 4283 smc->update_dt_enabled = true;
34a6b015 4284 mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
c5514d0e 4285 mc->has_hotpluggable_cpus = true;
52b81ab5 4286 smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
71461b0f 4287 fwc->get_dev_path = spapr_get_fw_dev_path;
34316482 4288 nc->nmi_monitor_handler = spapr_nmi;
6737d9ad 4289 smc->phb_placement = spapr_phb_placement;
1d1be34d 4290 vhc->hypercall = emulate_spapr_hypercall;
e57ca75c
DG
4291 vhc->hpt_mask = spapr_hpt_mask;
4292 vhc->map_hptes = spapr_map_hptes;
4293 vhc->unmap_hptes = spapr_unmap_hptes;
4294 vhc->store_hpte = spapr_store_hpte;
79825f4d 4295 vhc->get_pate = spapr_get_pate;
1ec26c75 4296 vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
7844e12b
CLG
4297 xic->ics_get = spapr_ics_get;
4298 xic->ics_resend = spapr_ics_resend;
b2fc59aa 4299 xic->icp_get = spapr_icp_get;
6449da45 4300 ispc->print_info = spapr_pic_print_info;
55641213
LV
4301 /* Force NUMA node memory size to be a multiple of
4302 * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
4303 * in which LMBs are represented and hot-added
4304 */
4305 mc->numa_mem_align_shift = 28;
33face6b 4306
4e5fe368
SJS
4307 smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
4308 smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
4309 smc->default_caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_ON;
8f38eaf8 4310 smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
09114fd8 4311 smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
4be8d4e7 4312 smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
2309832a 4313 smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
b9a477b7 4314 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
edaa7995 4315 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
8ff43ee4 4316 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
33face6b 4317 spapr_caps_add_properties(smc, &error_abort);
ef01ed9d 4318 smc->irq = &spapr_irq_xics;
dae5e39a 4319 smc->dr_phb_enabled = true;
29ee3247
AK
4320}
4321
4322static const TypeInfo spapr_machine_info = {
4323 .name = TYPE_SPAPR_MACHINE,
4324 .parent = TYPE_MACHINE,
4aee7362 4325 .abstract = true,
6ca1502e 4326 .instance_size = sizeof(sPAPRMachineState),
bcb5ce08 4327 .instance_init = spapr_instance_init,
87bbdd9c 4328 .instance_finalize = spapr_machine_finalizefn,
183930c0 4329 .class_size = sizeof(sPAPRMachineClass),
29ee3247 4330 .class_init = spapr_machine_class_init,
71461b0f
AK
4331 .interfaces = (InterfaceInfo[]) {
4332 { TYPE_FW_PATH_PROVIDER },
34316482 4333 { TYPE_NMI },
c20d332a 4334 { TYPE_HOTPLUG_HANDLER },
1d1be34d 4335 { TYPE_PPC_VIRTUAL_HYPERVISOR },
7844e12b 4336 { TYPE_XICS_FABRIC },
6449da45 4337 { TYPE_INTERRUPT_STATS_PROVIDER },
71461b0f
AK
4338 { }
4339 },
29ee3247
AK
4340};
4341
fccbc785 4342#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \
5013c547
DG
4343 static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
4344 void *data) \
4345 { \
4346 MachineClass *mc = MACHINE_CLASS(oc); \
4347 spapr_machine_##suffix##_class_options(mc); \
fccbc785
DG
4348 if (latest) { \
4349 mc->alias = "pseries"; \
4350 mc->is_default = 1; \
4351 } \
5013c547 4352 } \
5013c547
DG
4353 static const TypeInfo spapr_machine_##suffix##_info = { \
4354 .name = MACHINE_TYPE_NAME("pseries-" verstr), \
4355 .parent = TYPE_SPAPR_MACHINE, \
4356 .class_init = spapr_machine_##suffix##_class_init, \
5013c547
DG
4357 }; \
4358 static void spapr_machine_register_##suffix(void) \
4359 { \
4360 type_register(&spapr_machine_##suffix##_info); \
4361 } \
0e6aac87 4362 type_init(spapr_machine_register_##suffix)
5013c547 4363
84e060bf
AW
4364/*
4365 * pseries-4.0
4366 */
84e060bf
AW
4367static void spapr_machine_4_0_class_options(MachineClass *mc)
4368{
4369 /* Defaults for the latest behaviour inherited from the base class */
4370}
4371
4372DEFINE_SPAPR_MACHINE(4_0, "4.0", true);
4373
4374/*
d45360d9
CLG
4375 * pseries-3.1
4376 */
d45360d9
CLG
4377static void spapr_machine_3_1_class_options(MachineClass *mc)
4378{
fea35ca4 4379 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
27461d69
PP
4380 static GlobalProperty compat[] = {
4381 { TYPE_SPAPR_MACHINE, "host-model", "passthrough" },
4382 { TYPE_SPAPR_MACHINE, "host-serial", "passthrough" },
4383 };
fea35ca4 4384
84e060bf 4385 spapr_machine_4_0_class_options(mc);
abd93cc7 4386 compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len);
27461d69
PP
4387 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
4388
34a6b015 4389 mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
fea35ca4 4390 smc->update_dt_enabled = false;
dae5e39a 4391 smc->dr_phb_enabled = false;
edaa7995 4392 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
d45360d9
CLG
4393}
4394
84e060bf 4395DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
d45360d9 4396
8a4fd427 4397/*
d8c0c7af 4398 * pseries-3.0
8a4fd427 4399 */
d45360d9 4400
d8c0c7af 4401static void spapr_machine_3_0_class_options(MachineClass *mc)
8a4fd427 4402{
82cffa2e
CLG
4403 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
4404
d45360d9 4405 spapr_machine_3_1_class_options(mc);
ddb3235d 4406 compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len);
82cffa2e
CLG
4407
4408 smc->legacy_irq_allocation = true;
ae837402 4409 smc->irq = &spapr_irq_xics_legacy;
8a4fd427
DG
4410}
4411
d45360d9 4412DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
8a4fd427 4413
2b615412
DG
4414/*
4415 * pseries-2.12
4416 */
2b615412
DG
4417static void spapr_machine_2_12_class_options(MachineClass *mc)
4418{
2309832a 4419 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
88cbe073 4420 static GlobalProperty compat[] = {
6c36bddf
EH
4421 { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" },
4422 { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" },
88cbe073 4423 };
2309832a 4424
d8c0c7af 4425 spapr_machine_3_0_class_options(mc);
0d47310b 4426 compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len);
88cbe073 4427 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
2309832a 4428
e8937295
GK
4429 /* We depend on kvm_enabled() to choose a default value for the
4430 * hpt-max-page-size capability. Of course we can't do it here
4431 * because this is too early and the HW accelerator isn't initialzed
4432 * yet. Postpone this to machine init (see default_caps_with_cpu()).
4433 */
4434 smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
2b615412
DG
4435}
4436
8a4fd427 4437DEFINE_SPAPR_MACHINE(2_12, "2.12", false);
2b615412 4438
813f3cf6
SJS
4439static void spapr_machine_2_12_sxxm_class_options(MachineClass *mc)
4440{
4441 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
4442
4443 spapr_machine_2_12_class_options(mc);
4444 smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
4445 smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
4446 smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD;
4447}
4448
4449DEFINE_SPAPR_MACHINE(2_12_sxxm, "2.12-sxxm", false);
4450
e2676b16
GK
4451/*
4452 * pseries-2.11
4453 */
2b615412 4454
e2676b16
GK
4455static void spapr_machine_2_11_class_options(MachineClass *mc)
4456{
ee76a09f
DG
4457 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
4458
2b615412 4459 spapr_machine_2_12_class_options(mc);
4e5fe368 4460 smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON;
43df70a9 4461 compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len);
e2676b16
GK
4462}
4463
2b615412 4464DEFINE_SPAPR_MACHINE(2_11, "2.11", false);
e2676b16 4465
3fa14fbe
DG
4466/*
4467 * pseries-2.10
4468 */
e2676b16 4469
3fa14fbe
DG
4470static void spapr_machine_2_10_class_options(MachineClass *mc)
4471{
e2676b16 4472 spapr_machine_2_11_class_options(mc);
503224f4 4473 compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len);
3fa14fbe
DG
4474}
4475
e2676b16 4476DEFINE_SPAPR_MACHINE(2_10, "2.10", false);
3fa14fbe 4477
fa325e6c
DG
4478/*
4479 * pseries-2.9
4480 */
3fa14fbe 4481
fa325e6c
DG
4482static void spapr_machine_2_9_class_options(MachineClass *mc)
4483{
46f7afa3 4484 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
88cbe073 4485 static GlobalProperty compat[] = {
6c36bddf 4486 { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" },
88cbe073 4487 };
46f7afa3 4488
3fa14fbe 4489 spapr_machine_2_10_class_options(mc);
3e803152 4490 compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
88cbe073 4491 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
3bfe5716 4492 mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
46f7afa3 4493 smc->pre_2_10_has_unused_icps = true;
52b81ab5 4494 smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
fa325e6c
DG
4495}
4496
3fa14fbe 4497DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
fa325e6c 4498
db800b21
DG
4499/*
4500 * pseries-2.8
4501 */
fa325e6c 4502
db800b21
DG
4503static void spapr_machine_2_8_class_options(MachineClass *mc)
4504{
88cbe073 4505 static GlobalProperty compat[] = {
6c36bddf 4506 { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" },
88cbe073
MAL
4507 };
4508
fa325e6c 4509 spapr_machine_2_9_class_options(mc);
edc24ccd 4510 compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len);
88cbe073 4511 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
55641213 4512 mc->numa_mem_align_shift = 23;
db800b21
DG
4513}
4514
fa325e6c 4515DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
db800b21 4516
1ea1eefc
BR
4517/*
4518 * pseries-2.7
4519 */
357d1e3b
DG
4520
4521static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index,
4522 uint64_t *buid, hwaddr *pio,
4523 hwaddr *mmio32, hwaddr *mmio64,
4524 unsigned n_dma, uint32_t *liobns, Error **errp)
4525{
4526 /* Legacy PHB placement for pseries-2.7 and earlier machine types */
4527 const uint64_t base_buid = 0x800000020000000ULL;
4528 const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */
4529 const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */
4530 const hwaddr pio_offset = 0x80000000; /* 2 GiB */
4531 const uint32_t max_index = 255;
4532 const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */
4533
4534 uint64_t ram_top = MACHINE(spapr)->ram_size;
4535 hwaddr phb0_base, phb_base;
4536 int i;
4537
0c9269a5 4538 /* Do we have device memory? */
357d1e3b
DG
4539 if (MACHINE(spapr)->maxram_size > ram_top) {
4540 /* Can't just use maxram_size, because there may be an
0c9269a5
DH
4541 * alignment gap between normal and device memory regions
4542 */
b0c14ec4
DH
4543 ram_top = MACHINE(spapr)->device_memory->base +
4544 memory_region_size(&MACHINE(spapr)->device_memory->mr);
357d1e3b
DG
4545 }
4546
4547 phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
4548
4549 if (index > max_index) {
4550 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
4551 max_index);
4552 return;
4553 }
4554
4555 *buid = base_buid + index;
4556 for (i = 0; i < n_dma; ++i) {
4557 liobns[i] = SPAPR_PCI_LIOBN(index, i);
4558 }
4559
4560 phb_base = phb0_base + index * phb_spacing;
4561 *pio = phb_base + pio_offset;
4562 *mmio32 = phb_base + mmio_offset;
4563 /*
4564 * We don't set the 64-bit MMIO window, relying on the PHB's
4565 * fallback behaviour of automatically splitting a large "32-bit"
4566 * window into contiguous 32-bit and 64-bit windows
4567 */
4568}
db800b21 4569
1ea1eefc
BR
4570static void spapr_machine_2_7_class_options(MachineClass *mc)
4571{
3daa4a9f 4572 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
88cbe073 4573 static GlobalProperty compat[] = {
6c36bddf
EH
4574 { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000", },
4575 { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0", },
4576 { TYPE_POWERPC_CPU, "pre-2.8-migration", "on", },
4577 { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on", },
88cbe073 4578 };
3daa4a9f 4579
db800b21 4580 spapr_machine_2_8_class_options(mc);
2e9c10eb 4581 mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3");
a140c199 4582 mc->default_machine_opts = "modern-hotplug-events=off";
5a995064 4583 compat_props_add(mc->compat_props, hw_compat_2_7, hw_compat_2_7_len);
88cbe073 4584 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
357d1e3b 4585 smc->phb_placement = phb_placement_2_7;
1ea1eefc
BR
4586}
4587
db800b21 4588DEFINE_SPAPR_MACHINE(2_7, "2.7", false);
1ea1eefc 4589
4b23699c
DG
4590/*
4591 * pseries-2.6
4592 */
1ea1eefc 4593
4b23699c
DG
4594static void spapr_machine_2_6_class_options(MachineClass *mc)
4595{
88cbe073 4596 static GlobalProperty compat[] = {
6c36bddf 4597 { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" },
88cbe073
MAL
4598 };
4599
1ea1eefc 4600 spapr_machine_2_7_class_options(mc);
c5514d0e 4601 mc->has_hotpluggable_cpus = false;
ff8f261f 4602 compat_props_add(mc->compat_props, hw_compat_2_6, hw_compat_2_6_len);
88cbe073 4603 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
4b23699c
DG
4604}
4605
1ea1eefc 4606DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
4b23699c 4607
1c5f29bb
DG
4608/*
4609 * pseries-2.5
4610 */
4b23699c 4611
5013c547
DG
4612static void spapr_machine_2_5_class_options(MachineClass *mc)
4613{
57040d45 4614 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
88cbe073 4615 static GlobalProperty compat[] = {
6c36bddf 4616 { "spapr-vlan", "use-rx-buffer-pools", "off" },
88cbe073 4617 };
57040d45 4618
4b23699c 4619 spapr_machine_2_6_class_options(mc);
57040d45 4620 smc->use_ohci_by_default = true;
fe759610 4621 compat_props_add(mc->compat_props, hw_compat_2_5, hw_compat_2_5_len);
88cbe073 4622 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
1c5f29bb
DG
4623}
4624
4b23699c 4625DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
1c5f29bb
DG
4626
4627/*
4628 * pseries-2.4
4629 */
80fd50f9 4630
5013c547
DG
4631static void spapr_machine_2_4_class_options(MachineClass *mc)
4632{
fc9f38c3
DG
4633 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
4634
4635 spapr_machine_2_5_class_options(mc);
fc9f38c3 4636 smc->dr_lmb_enabled = false;
2f99b9c2 4637 compat_props_add(mc->compat_props, hw_compat_2_4, hw_compat_2_4_len);
1c5f29bb
DG
4638}
4639
fccbc785 4640DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
1c5f29bb
DG
4641
4642/*
4643 * pseries-2.3
4644 */
38ff32c6 4645
5013c547 4646static void spapr_machine_2_3_class_options(MachineClass *mc)
6026db45 4647{
88cbe073 4648 static GlobalProperty compat[] = {
6c36bddf 4649 { "spapr-pci-host-bridge", "dynamic-reconfiguration", "off" },
88cbe073 4650 };
fc9f38c3 4651 spapr_machine_2_4_class_options(mc);
8995dd90 4652 compat_props_add(mc->compat_props, hw_compat_2_3, hw_compat_2_3_len);
88cbe073 4653 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
6026db45 4654}
fccbc785 4655DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
6026db45 4656
1c5f29bb
DG
4657/*
4658 * pseries-2.2
4659 */
1c5f29bb 4660
5013c547 4661static void spapr_machine_2_2_class_options(MachineClass *mc)
4aee7362 4662{
88cbe073 4663 static GlobalProperty compat[] = {
6c36bddf 4664 { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0x20000000" },
88cbe073
MAL
4665 };
4666
fc9f38c3 4667 spapr_machine_2_3_class_options(mc);
1c30044e 4668 compat_props_add(mc->compat_props, hw_compat_2_2, hw_compat_2_2_len);
88cbe073 4669 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
f6d0656b 4670 mc->default_machine_opts = "modern-hotplug-events=off,suppress-vmdesc=on";
4aee7362 4671}
fccbc785 4672DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
4aee7362 4673
1c5f29bb
DG
4674/*
4675 * pseries-2.1
4676 */
3dab0244 4677
5013c547 4678static void spapr_machine_2_1_class_options(MachineClass *mc)
d25228e7 4679{
fc9f38c3 4680 spapr_machine_2_2_class_options(mc);
c4fc5695 4681 compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
d25228e7 4682}
fccbc785 4683DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
fb0fc8f6 4684
29ee3247 4685static void spapr_machine_register_types(void)
9fdf0c29 4686{
29ee3247 4687 type_register_static(&spapr_machine_info);
9fdf0c29
DG
4688}
4689
29ee3247 4690type_init(spapr_machine_register_types)