]> git.proxmox.com Git - mirror_qemu.git/blame - hw/ppc/spapr.c
machine: add default_ram_size to machine class
[mirror_qemu.git] / hw / ppc / spapr.c
CommitLineData
9fdf0c29
DG
1/*
2 * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
3 *
4 * Copyright (c) 2004-2007 Fabrice Bellard
5 * Copyright (c) 2007 Jocelyn Mayer
6 * Copyright (c) 2010 David Gibson, IBM Corporation.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 *
26 */
9c17d615 27#include "sysemu/sysemu.h"
e35704ba 28#include "sysemu/numa.h"
83c9f4ca 29#include "hw/hw.h"
71461b0f 30#include "hw/fw-path-provider.h"
9fdf0c29 31#include "elf.h"
1422e32d 32#include "net/net.h"
fa1d36df 33#include "sysemu/block-backend.h"
9c17d615
PB
34#include "sysemu/cpus.h"
35#include "sysemu/kvm.h"
e97c3636 36#include "kvm_ppc.h"
4be21d56 37#include "mmu-hash64.h"
3794d548 38#include "qom/cpu.h"
9fdf0c29
DG
39
40#include "hw/boards.h"
0d09e41a 41#include "hw/ppc/ppc.h"
9fdf0c29
DG
42#include "hw/loader.h"
43
0d09e41a
PB
44#include "hw/ppc/spapr.h"
45#include "hw/ppc/spapr_vio.h"
46#include "hw/pci-host/spapr.h"
47#include "hw/ppc/xics.h"
a2cb15b0 48#include "hw/pci/msi.h"
9fdf0c29 49
83c9f4ca 50#include "hw/pci/pci.h"
71461b0f
AK
51#include "hw/scsi/scsi.h"
52#include "hw/virtio/virtio-scsi.h"
f61b4bed 53
022c62cb 54#include "exec/address-spaces.h"
35139a59 55#include "hw/usb.h"
1de7afc9 56#include "qemu/config-file.h"
135a129a 57#include "qemu/error-report.h"
2a6593cb 58#include "trace.h"
34316482 59#include "hw/nmi.h"
890c2b77 60
68a27b20
MT
61#include "hw/compat.h"
62
9fdf0c29
DG
63#include <libfdt.h>
64
4d8d5467
BH
65/* SLOF memory layout:
66 *
67 * SLOF raw image loaded at 0, copies its romfs right below the flat
68 * device-tree, then position SLOF itself 31M below that
69 *
70 * So we set FW_OVERHEAD to 40MB which should account for all of that
71 * and more
72 *
73 * We load our kernel at 4M, leaving space for SLOF initial image
74 */
3bf6eedd 75#define FDT_MAX_SIZE 0x40000
39ac8455 76#define RTAS_MAX_SIZE 0x10000
b7d1f77a 77#define RTAS_MAX_ADDR 0x80000000 /* RTAS must stay below that */
a9f8ad8f
DG
78#define FW_MAX_SIZE 0x400000
79#define FW_FILE_NAME "slof.bin"
4d8d5467
BH
80#define FW_OVERHEAD 0x2800000
81#define KERNEL_LOAD_ADDR FW_MAX_SIZE
a9f8ad8f 82
4d8d5467 83#define MIN_RMA_SLOF 128UL
9fdf0c29
DG
84
85#define TIMEBASE_FREQ 512000000ULL
86
9674a356 87#define MAX_CPUS 255
9fdf0c29 88
0c103f8e
DG
89#define PHANDLE_XICP 0x00001111
90
7f763a5d
DG
91#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
92
6ca1502e 93typedef struct sPAPRMachineState sPAPRMachineState;
748abce9 94
29ee3247 95#define TYPE_SPAPR_MACHINE "spapr-machine"
748abce9 96#define SPAPR_MACHINE(obj) \
6ca1502e 97 OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
748abce9
EH
98
99/**
6ca1502e 100 * sPAPRMachineState:
748abce9 101 */
6ca1502e 102struct sPAPRMachineState {
748abce9
EH
103 /*< private >*/
104 MachineState parent_obj;
23825581
EH
105
106 /*< public >*/
107 char *kvm_type;
748abce9
EH
108};
109
9fdf0c29
DG
110sPAPREnvironment *spapr;
111
c04d6cfa 112static XICSState *try_create_xics(const char *type, int nr_servers,
34f2af3d 113 int nr_irqs, Error **errp)
c04d6cfa 114{
34f2af3d 115 Error *err = NULL;
c04d6cfa
AL
116 DeviceState *dev;
117
118 dev = qdev_create(NULL, type);
119 qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
120 qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
34f2af3d
MA
121 object_property_set_bool(OBJECT(dev), true, "realized", &err);
122 if (err) {
123 error_propagate(errp, err);
124 object_unparent(OBJECT(dev));
c04d6cfa
AL
125 return NULL;
126 }
5a3d7b23 127 return XICS_COMMON(dev);
c04d6cfa
AL
128}
129
446f16a6
MA
130static XICSState *xics_system_init(MachineState *machine,
131 int nr_servers, int nr_irqs)
c04d6cfa
AL
132{
133 XICSState *icp = NULL;
134
11ad93f6 135 if (kvm_enabled()) {
34f2af3d
MA
136 Error *err = NULL;
137
446f16a6 138 if (machine_kernel_irqchip_allowed(machine)) {
34f2af3d 139 icp = try_create_xics(TYPE_KVM_XICS, nr_servers, nr_irqs, &err);
11ad93f6 140 }
446f16a6 141 if (machine_kernel_irqchip_required(machine) && !icp) {
34f2af3d
MA
142 error_report("kernel_irqchip requested but unavailable: %s",
143 error_get_pretty(err));
11ad93f6
DG
144 }
145 }
146
147 if (!icp) {
34f2af3d 148 icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs, &error_abort);
c04d6cfa
AL
149 }
150
151 return icp;
152}
153
833d4668
AK
154static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
155 int smt_threads)
156{
157 int i, ret = 0;
158 uint32_t servers_prop[smt_threads];
159 uint32_t gservers_prop[smt_threads * 2];
160 int index = ppc_get_vcpu_dt_id(cpu);
161
6d9412ea 162 if (cpu->cpu_version) {
4bce526e 163 ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version);
6d9412ea
AK
164 if (ret < 0) {
165 return ret;
166 }
167 }
168
833d4668
AK
169 /* Build interrupt servers and gservers properties */
170 for (i = 0; i < smt_threads; i++) {
171 servers_prop[i] = cpu_to_be32(index + i);
172 /* Hack, direct the group queues back to cpu 0 */
173 gservers_prop[i*2] = cpu_to_be32(index + i);
174 gservers_prop[i*2 + 1] = 0;
175 }
176 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
177 servers_prop, sizeof(servers_prop));
178 if (ret < 0) {
179 return ret;
180 }
181 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
182 gservers_prop, sizeof(gservers_prop));
183
184 return ret;
185}
186
7f763a5d 187static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
6e806cc3 188{
82677ed2
AK
189 int ret = 0, offset, cpus_offset;
190 CPUState *cs;
6e806cc3
BR
191 char cpu_model[32];
192 int smt = kvmppc_smt_threads();
7f763a5d 193 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
6e806cc3 194
82677ed2
AK
195 CPU_FOREACH(cs) {
196 PowerPCCPU *cpu = POWERPC_CPU(cs);
197 DeviceClass *dc = DEVICE_GET_CLASS(cs);
198 int index = ppc_get_vcpu_dt_id(cpu);
6e806cc3
BR
199 uint32_t associativity[] = {cpu_to_be32(0x5),
200 cpu_to_be32(0x0),
201 cpu_to_be32(0x0),
202 cpu_to_be32(0x0),
82677ed2 203 cpu_to_be32(cs->numa_node),
0f20ba62 204 cpu_to_be32(index)};
6e806cc3 205
0f20ba62 206 if ((index % smt) != 0) {
6e806cc3
BR
207 continue;
208 }
209
82677ed2 210 snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
6e806cc3 211
82677ed2
AK
212 cpus_offset = fdt_path_offset(fdt, "/cpus");
213 if (cpus_offset < 0) {
214 cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
215 "cpus");
216 if (cpus_offset < 0) {
217 return cpus_offset;
218 }
219 }
220 offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
6e806cc3 221 if (offset < 0) {
82677ed2
AK
222 offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
223 if (offset < 0) {
224 return offset;
225 }
6e806cc3
BR
226 }
227
7f763a5d
DG
228 if (nb_numa_nodes > 1) {
229 ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
230 sizeof(associativity));
231 if (ret < 0) {
232 return ret;
233 }
234 }
235
236 ret = fdt_setprop(fdt, offset, "ibm,pft-size",
237 pft_size_prop, sizeof(pft_size_prop));
6e806cc3
BR
238 if (ret < 0) {
239 return ret;
240 }
833d4668 241
82677ed2 242 ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
2a48d993 243 ppc_get_compat_smt_threads(cpu));
833d4668
AK
244 if (ret < 0) {
245 return ret;
246 }
6e806cc3
BR
247 }
248 return ret;
249}
250
5af9873d
BH
251
252static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
253 size_t maxsize)
254{
255 size_t maxcells = maxsize / sizeof(uint32_t);
256 int i, j, count;
257 uint32_t *p = prop;
258
259 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
260 struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
261
262 if (!sps->page_shift) {
263 break;
264 }
265 for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
266 if (sps->enc[count].page_shift == 0) {
267 break;
268 }
269 }
270 if ((p - prop) >= (maxcells - 3 - count * 2)) {
271 break;
272 }
273 *(p++) = cpu_to_be32(sps->page_shift);
274 *(p++) = cpu_to_be32(sps->slb_enc);
275 *(p++) = cpu_to_be32(count);
276 for (j = 0; j < count; j++) {
277 *(p++) = cpu_to_be32(sps->enc[j].page_shift);
278 *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
279 }
280 }
281
282 return (p - prop) * sizeof(uint32_t);
283}
284
b082d65a
AK
285static hwaddr spapr_node0_size(void)
286{
287 if (nb_numa_nodes) {
288 int i;
289 for (i = 0; i < nb_numa_nodes; ++i) {
290 if (numa_info[i].node_mem) {
291 return MIN(pow2floor(numa_info[i].node_mem), ram_size);
292 }
293 }
294 }
295 return ram_size;
296}
297
7f763a5d
DG
298#define _FDT(exp) \
299 do { \
300 int ret = (exp); \
301 if (ret < 0) { \
302 fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
303 #exp, fdt_strerror(ret)); \
304 exit(1); \
305 } \
306 } while (0)
307
a1d59c0f
AK
308static void add_str(GString *s, const gchar *s1)
309{
310 g_string_append_len(s, s1, strlen(s1) + 1);
311}
7f763a5d 312
3bbf37f2 313static void *spapr_create_fdt_skel(hwaddr initrd_base,
a8170e5e
AK
314 hwaddr initrd_size,
315 hwaddr kernel_size,
16457e7f 316 bool little_endian,
74d042e5
DG
317 const char *kernel_cmdline,
318 uint32_t epow_irq)
9fdf0c29
DG
319{
320 void *fdt;
182735ef 321 CPUState *cs;
9fdf0c29
DG
322 uint32_t start_prop = cpu_to_be32(initrd_base);
323 uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
a1d59c0f
AK
324 GString *hypertas = g_string_sized_new(256);
325 GString *qemu_hypertas = g_string_sized_new(256);
7f763a5d 326 uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
b5cec4c5 327 uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
833d4668 328 int smt = kvmppc_smt_threads();
6e806cc3 329 unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
10582ff8
AK
330 QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
331 unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
332 uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
ef951443 333 char *buf;
9fdf0c29 334
a1d59c0f
AK
335 add_str(hypertas, "hcall-pft");
336 add_str(hypertas, "hcall-term");
337 add_str(hypertas, "hcall-dabr");
338 add_str(hypertas, "hcall-interrupt");
339 add_str(hypertas, "hcall-tce");
340 add_str(hypertas, "hcall-vio");
341 add_str(hypertas, "hcall-splpar");
342 add_str(hypertas, "hcall-bulk");
343 add_str(hypertas, "hcall-set-mode");
344 add_str(qemu_hypertas, "hcall-memop1");
345
7267c094 346 fdt = g_malloc0(FDT_MAX_SIZE);
9fdf0c29
DG
347 _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
348
4d8d5467
BH
349 if (kernel_size) {
350 _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
351 }
352 if (initrd_size) {
353 _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
354 }
9fdf0c29
DG
355 _FDT((fdt_finish_reservemap(fdt)));
356
357 /* Root node */
358 _FDT((fdt_begin_node(fdt, "")));
359 _FDT((fdt_property_string(fdt, "device_type", "chrp")));
5d73dd66 360 _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
d63919c9 361 _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
9fdf0c29 362
ef951443
ND
363 /*
364 * Add info to guest to indentify which host is it being run on
365 * and what is the uuid of the guest
366 */
367 if (kvmppc_get_host_model(&buf)) {
368 _FDT((fdt_property_string(fdt, "host-model", buf)));
369 g_free(buf);
370 }
371 if (kvmppc_get_host_serial(&buf)) {
372 _FDT((fdt_property_string(fdt, "host-serial", buf)));
373 g_free(buf);
374 }
375
376 buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
377 qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
378 qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
379 qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
380 qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
381 qemu_uuid[14], qemu_uuid[15]);
382
383 _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
384 g_free(buf);
385
9fdf0c29
DG
386 _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
387 _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
388
389 /* /chosen */
390 _FDT((fdt_begin_node(fdt, "chosen")));
391
6e806cc3
BR
392 /* Set Form1_affinity */
393 _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
394
9fdf0c29
DG
395 _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
396 _FDT((fdt_property(fdt, "linux,initrd-start",
397 &start_prop, sizeof(start_prop))));
398 _FDT((fdt_property(fdt, "linux,initrd-end",
399 &end_prop, sizeof(end_prop))));
4d8d5467
BH
400 if (kernel_size) {
401 uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
402 cpu_to_be64(kernel_size) };
9fdf0c29 403
4d8d5467 404 _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
16457e7f
BH
405 if (little_endian) {
406 _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
407 }
4d8d5467 408 }
cc84c0f3
AS
409 if (boot_menu) {
410 _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
411 }
f28359d8
LZ
412 _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
413 _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
414 _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
3384f95c 415
9fdf0c29
DG
416 _FDT((fdt_end_node(fdt)));
417
9fdf0c29
DG
418 /* cpus */
419 _FDT((fdt_begin_node(fdt, "cpus")));
420
421 _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
422 _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
423
bdc44640 424 CPU_FOREACH(cs) {
182735ef
AF
425 PowerPCCPU *cpu = POWERPC_CPU(cs);
426 CPUPPCState *env = &cpu->env;
3bbf37f2 427 DeviceClass *dc = DEVICE_GET_CLASS(cs);
182735ef 428 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
0f20ba62 429 int index = ppc_get_vcpu_dt_id(cpu);
9fdf0c29
DG
430 char *nodename;
431 uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
432 0xffffffff, 0xffffffff};
0a8b2938
AG
433 uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
434 uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
5af9873d
BH
435 uint32_t page_sizes_prop[64];
436 size_t page_sizes_prop_size;
9fdf0c29 437
e97c3636
DG
438 if ((index % smt) != 0) {
439 continue;
440 }
441
3bbf37f2 442 nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
9fdf0c29
DG
443
444 _FDT((fdt_begin_node(fdt, nodename)));
445
4ecf8aa5 446 g_free(nodename);
9fdf0c29 447
c7a5c0c9 448 _FDT((fdt_property_cell(fdt, "reg", index)));
9fdf0c29
DG
449 _FDT((fdt_property_string(fdt, "device_type", "cpu")));
450
451 _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
0cbad81f 452 _FDT((fdt_property_cell(fdt, "d-cache-block-size",
9fdf0c29 453 env->dcache_line_size)));
0cbad81f
DG
454 _FDT((fdt_property_cell(fdt, "d-cache-line-size",
455 env->dcache_line_size)));
456 _FDT((fdt_property_cell(fdt, "i-cache-block-size",
457 env->icache_line_size)));
458 _FDT((fdt_property_cell(fdt, "i-cache-line-size",
9fdf0c29 459 env->icache_line_size)));
0cbad81f
DG
460
461 if (pcc->l1_dcache_size) {
462 _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
463 } else {
464 fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
465 }
466 if (pcc->l1_icache_size) {
467 _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
468 } else {
469 fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
470 }
471
0a8b2938
AG
472 _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
473 _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
9fdf0c29
DG
474 _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
475 _FDT((fdt_property_string(fdt, "status", "okay")));
476 _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
e97c3636 477
dcb861cb
AK
478 if (env->spr_cb[SPR_PURR].oea_read) {
479 _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
480 }
481
c7a5c0c9 482 if (env->mmu_model & POWERPC_MMU_1TSEG) {
9fdf0c29
DG
483 _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
484 segs, sizeof(segs))));
485 }
486
6659394f
DG
487 /* Advertise VMX/VSX (vector extensions) if available
488 * 0 / no property == no vector extensions
489 * 1 == VMX / Altivec available
490 * 2 == VSX available */
a7342588
DG
491 if (env->insns_flags & PPC_ALTIVEC) {
492 uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
493
6659394f
DG
494 _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
495 }
496
497 /* Advertise DFP (Decimal Floating Point) if available
498 * 0 / no property == no DFP
499 * 1 == DFP available */
a7342588
DG
500 if (env->insns_flags2 & PPC2_DFP) {
501 _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
6659394f
DG
502 }
503
5af9873d
BH
504 page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
505 sizeof(page_sizes_prop));
506 if (page_sizes_prop_size) {
507 _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
508 page_sizes_prop, page_sizes_prop_size)));
509 }
510
10582ff8
AK
511 _FDT((fdt_property_cell(fdt, "ibm,chip-id",
512 cs->cpu_index / cpus_per_socket)));
513
9fdf0c29
DG
514 _FDT((fdt_end_node(fdt)));
515 }
516
9fdf0c29
DG
517 _FDT((fdt_end_node(fdt)));
518
f43e3525
DG
519 /* RTAS */
520 _FDT((fdt_begin_node(fdt, "rtas")));
521
da95324e
AK
522 if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
523 add_str(hypertas, "hcall-multi-tce");
524 }
a1d59c0f
AK
525 _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
526 hypertas->len)));
527 g_string_free(hypertas, TRUE);
528 _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
529 qemu_hypertas->len)));
530 g_string_free(qemu_hypertas, TRUE);
f43e3525 531
6e806cc3
BR
532 _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
533 refpoints, sizeof(refpoints))));
534
74d042e5 535 _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
79853e18
TD
536 _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
537 RTAS_EVENT_SCAN_RATE)));
74d042e5 538
2e14072f 539 /*
9d632f5f 540 * According to PAPR, rtas ibm,os-term does not guarantee a return
2e14072f
ND
541 * back to the guest cpu.
542 *
543 * While an additional ibm,extended-os-term property indicates that
544 * rtas call return will always occur. Set this property.
545 */
546 _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
547
f43e3525
DG
548 _FDT((fdt_end_node(fdt)));
549
b5cec4c5 550 /* interrupt controller */
9dfef5aa 551 _FDT((fdt_begin_node(fdt, "interrupt-controller")));
b5cec4c5
DG
552
553 _FDT((fdt_property_string(fdt, "device_type",
554 "PowerPC-External-Interrupt-Presentation")));
555 _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
b5cec4c5
DG
556 _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
557 _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
558 interrupt_server_ranges_prop,
559 sizeof(interrupt_server_ranges_prop))));
0c103f8e
DG
560 _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
561 _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
562 _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
b5cec4c5
DG
563
564 _FDT((fdt_end_node(fdt)));
565
4040ab72
DG
566 /* vdevice */
567 _FDT((fdt_begin_node(fdt, "vdevice")));
568
569 _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
570 _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
571 _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
572 _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
b5cec4c5
DG
573 _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
574 _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
4040ab72
DG
575
576 _FDT((fdt_end_node(fdt)));
577
74d042e5
DG
578 /* event-sources */
579 spapr_events_fdt_skel(fdt, epow_irq);
580
f7d69146
AG
581 /* /hypervisor node */
582 if (kvm_enabled()) {
583 uint8_t hypercall[16];
584
585 /* indicate KVM hypercall interface */
586 _FDT((fdt_begin_node(fdt, "hypervisor")));
587 _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
588 if (kvmppc_has_cap_fixup_hcalls()) {
589 /*
590 * Older KVM versions with older guest kernels were broken with the
591 * magic page, don't allow the guest to map it.
592 */
593 kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
594 sizeof(hypercall));
595 _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
596 sizeof(hypercall))));
597 }
598 _FDT((fdt_end_node(fdt)));
599 }
600
9fdf0c29
DG
601 _FDT((fdt_end_node(fdt))); /* close root node */
602 _FDT((fdt_finish(fdt)));
603
a3467baa
DG
604 return fdt;
605}
606
2a6593cb
AK
607int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
608{
609 void *fdt, *fdt_skel;
610 sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
611
612 size -= sizeof(hdr);
613
614 /* Create sceleton */
615 fdt_skel = g_malloc0(size);
616 _FDT((fdt_create(fdt_skel, size)));
617 _FDT((fdt_begin_node(fdt_skel, "")));
618 _FDT((fdt_end_node(fdt_skel)));
619 _FDT((fdt_finish(fdt_skel)));
620 fdt = g_malloc0(size);
621 _FDT((fdt_open_into(fdt_skel, fdt, size)));
622 g_free(fdt_skel);
623
3794d548
AK
624 /* Fix skeleton up */
625 _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
2a6593cb
AK
626
627 /* Pack resulting tree */
628 _FDT((fdt_pack(fdt)));
629
630 if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
631 trace_spapr_cas_failed(size);
632 return -1;
633 }
634
635 cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
636 cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
637 trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
638 g_free(fdt);
639
640 return 0;
641}
642
26a8c353
AK
643static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
644 hwaddr size)
645{
646 uint32_t associativity[] = {
647 cpu_to_be32(0x4), /* length */
648 cpu_to_be32(0x0), cpu_to_be32(0x0),
c3b4f589 649 cpu_to_be32(0x0), cpu_to_be32(nodeid)
26a8c353
AK
650 };
651 char mem_name[32];
652 uint64_t mem_reg_property[2];
653 int off;
654
655 mem_reg_property[0] = cpu_to_be64(start);
656 mem_reg_property[1] = cpu_to_be64(size);
657
658 sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
659 off = fdt_add_subnode(fdt, 0, mem_name);
660 _FDT(off);
661 _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
662 _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
663 sizeof(mem_reg_property))));
664 _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
665 sizeof(associativity))));
666}
667
7f763a5d
DG
668static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
669{
7db8a127
AK
670 hwaddr mem_start, node_size;
671 int i, nb_nodes = nb_numa_nodes;
672 NodeInfo *nodes = numa_info;
673 NodeInfo ramnode;
674
675 /* No NUMA nodes, assume there is just one node with whole RAM */
676 if (!nb_numa_nodes) {
677 nb_nodes = 1;
678 ramnode.node_mem = ram_size;
679 nodes = &ramnode;
5fe269b1 680 }
7f763a5d 681
7db8a127
AK
682 for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
683 if (!nodes[i].node_mem) {
684 continue;
685 }
5fe269b1
PM
686 if (mem_start >= ram_size) {
687 node_size = 0;
688 } else {
7db8a127 689 node_size = nodes[i].node_mem;
5fe269b1
PM
690 if (node_size > ram_size - mem_start) {
691 node_size = ram_size - mem_start;
692 }
693 }
7db8a127
AK
694 if (!mem_start) {
695 /* ppc_spapr_init() checks for rma_size <= node0_size already */
696 spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
697 mem_start += spapr->rma_size;
698 node_size -= spapr->rma_size;
699 }
6010818c
AK
700 for ( ; node_size; ) {
701 hwaddr sizetmp = pow2floor(node_size);
702
703 /* mem_start != 0 here */
704 if (ctzl(mem_start) < ctzl(sizetmp)) {
705 sizetmp = 1ULL << ctzl(mem_start);
706 }
707
708 spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
709 node_size -= sizetmp;
710 mem_start += sizetmp;
711 }
7f763a5d
DG
712 }
713
714 return 0;
715}
716
a3467baa 717static void spapr_finalize_fdt(sPAPREnvironment *spapr,
a8170e5e
AK
718 hwaddr fdt_addr,
719 hwaddr rtas_addr,
720 hwaddr rtas_size)
a3467baa 721{
5b2128d2
AG
722 MachineState *machine = MACHINE(qdev_get_machine());
723 const char *boot_device = machine->boot_order;
71461b0f
AK
724 int ret, i;
725 size_t cb = 0;
726 char *bootlist;
a3467baa 727 void *fdt;
3384f95c 728 sPAPRPHBState *phb;
a3467baa 729
7267c094 730 fdt = g_malloc(FDT_MAX_SIZE);
a3467baa
DG
731
732 /* open out the base tree into a temp buffer for the final tweaks */
733 _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
4040ab72 734
7f763a5d
DG
735 ret = spapr_populate_memory(spapr, fdt);
736 if (ret < 0) {
737 fprintf(stderr, "couldn't setup memory nodes in fdt\n");
738 exit(1);
739 }
740
4040ab72
DG
741 ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
742 if (ret < 0) {
743 fprintf(stderr, "couldn't setup vio devices in fdt\n");
744 exit(1);
745 }
746
3384f95c 747 QLIST_FOREACH(phb, &spapr->phbs, list) {
e0fdbd7c 748 ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
3384f95c
DG
749 }
750
751 if (ret < 0) {
752 fprintf(stderr, "couldn't setup PCI devices in fdt\n");
753 exit(1);
754 }
755
39ac8455
DG
756 /* RTAS */
757 ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
758 if (ret < 0) {
759 fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
760 }
761
6e806cc3 762 /* Advertise NUMA via ibm,associativity */
7f763a5d
DG
763 ret = spapr_fixup_cpu_dt(fdt, spapr);
764 if (ret < 0) {
765 fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
6e806cc3
BR
766 }
767
71461b0f
AK
768 bootlist = get_boot_devices_list(&cb, true);
769 if (cb && bootlist) {
770 int offset = fdt_path_offset(fdt, "/chosen");
771 if (offset < 0) {
772 exit(1);
773 }
774 for (i = 0; i < cb; i++) {
775 if (bootlist[i] == '\n') {
776 bootlist[i] = ' ';
777 }
778
779 }
780 ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
781 }
782
5b2128d2
AG
783 if (boot_device && strlen(boot_device)) {
784 int offset = fdt_path_offset(fdt, "/chosen");
785
786 if (offset < 0) {
787 exit(1);
788 }
789 fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
790 }
791
3fc5acde 792 if (!spapr->has_graphics) {
f28359d8
LZ
793 spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
794 }
68f3a94c 795
4040ab72
DG
796 _FDT((fdt_pack(fdt)));
797
4d8d5467 798 if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
730fce59
TH
799 error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
800 fdt_totalsize(fdt), FDT_MAX_SIZE);
4d8d5467
BH
801 exit(1);
802 }
803
a3467baa 804 cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
9fdf0c29 805
a21a7a70 806 g_free(bootlist);
7267c094 807 g_free(fdt);
9fdf0c29
DG
808}
809
810static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
811{
812 return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
813}
814
1b14670a 815static void emulate_spapr_hypercall(PowerPCCPU *cpu)
9fdf0c29 816{
1b14670a
AF
817 CPUPPCState *env = &cpu->env;
818
efcb9383
DG
819 if (msr_pr) {
820 hcall_dprintf("Hypercall made with MSR[PR]=1\n");
821 env->gpr[3] = H_PRIVILEGE;
822 } else {
aa100fa4 823 env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
efcb9383 824 }
9fdf0c29
DG
825}
826
e6b8fd24
SMJ
827#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2))
828#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
829#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
830#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
831#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
832
7f763a5d
DG
833static void spapr_reset_htab(sPAPREnvironment *spapr)
834{
835 long shift;
e6b8fd24 836 int index;
7f763a5d
DG
837
838 /* allocate hash page table. For now we always make this 16mb,
839 * later we should probably make it scale to the size of guest
840 * RAM */
841
842 shift = kvmppc_reset_htab(spapr->htab_shift);
843
844 if (shift > 0) {
845 /* Kernel handles htab, we don't need to allocate one */
846 spapr->htab_shift = shift;
7c43bca0 847 kvmppc_kern_htab = true;
01a57972
SMJ
848
849 /* Tell readers to update their file descriptor */
850 if (spapr->htab_fd >= 0) {
851 spapr->htab_fd_stale = true;
852 }
7f763a5d
DG
853 } else {
854 if (!spapr->htab) {
855 /* Allocate an htab if we don't yet have one */
856 spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
857 }
858
859 /* And clear it */
860 memset(spapr->htab, 0, HTAB_SIZE(spapr));
e6b8fd24
SMJ
861
862 for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
863 DIRTY_HPTE(HPTE(spapr->htab, index));
864 }
7f763a5d
DG
865 }
866
867 /* Update the RMA size if necessary */
868 if (spapr->vrma_adjust) {
b082d65a
AK
869 spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
870 spapr->htab_shift);
7f763a5d 871 }
9fdf0c29
DG
872}
873
9e3f9733
AG
874static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
875{
876 bool matched = false;
877
878 if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
879 matched = true;
880 }
881
882 if (!matched) {
883 error_report("Device %s is not supported by this machine yet.",
884 qdev_fw_name(DEVICE(sbdev)));
885 exit(1);
886 }
887
888 return 0;
889}
890
01a57972
SMJ
891/*
892 * A guest reset will cause spapr->htab_fd to become stale if being used.
893 * Reopen the file descriptor to make sure the whole HTAB is properly read.
894 */
895static int spapr_check_htab_fd(sPAPREnvironment *spapr)
896{
897 int rc = 0;
898
899 if (spapr->htab_fd_stale) {
900 close(spapr->htab_fd);
901 spapr->htab_fd = kvmppc_get_htab_fd(false);
902 if (spapr->htab_fd < 0) {
903 error_report("Unable to open fd for reading hash table from KVM: "
730fce59 904 "%s", strerror(errno));
01a57972
SMJ
905 rc = -1;
906 }
907 spapr->htab_fd_stale = false;
908 }
909
910 return rc;
911}
912
c8787ad4 913static void ppc_spapr_reset(void)
a3467baa 914{
182735ef 915 PowerPCCPU *first_ppc_cpu;
b7d1f77a 916 uint32_t rtas_limit;
259186a7 917
9e3f9733
AG
918 /* Check for unknown sysbus devices */
919 foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
920
7f763a5d
DG
921 /* Reset the hash table & recalc the RMA */
922 spapr_reset_htab(spapr);
a3467baa 923
c8787ad4 924 qemu_devices_reset();
a3467baa 925
b7d1f77a
BH
926 /*
927 * We place the device tree and RTAS just below either the top of the RMA,
928 * or just below 2GB, whichever is lowere, so that it can be
929 * processed with 32-bit real mode code if necessary
930 */
931 rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
932 spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
933 spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
934
a3467baa
DG
935 /* Load the fdt */
936 spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
937 spapr->rtas_size);
938
b7d1f77a
BH
939 /* Copy RTAS over */
940 cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
941 spapr->rtas_size);
942
a3467baa 943 /* Set up the entry state */
182735ef
AF
944 first_ppc_cpu = POWERPC_CPU(first_cpu);
945 first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
946 first_ppc_cpu->env.gpr[5] = 0;
947 first_cpu->halted = 0;
948 first_ppc_cpu->env.nip = spapr->entry_point;
a3467baa
DG
949
950}
951
1bba0dc9
AF
952static void spapr_cpu_reset(void *opaque)
953{
5b2038e0 954 PowerPCCPU *cpu = opaque;
259186a7 955 CPUState *cs = CPU(cpu);
048706d9 956 CPUPPCState *env = &cpu->env;
1bba0dc9 957
259186a7 958 cpu_reset(cs);
048706d9
DG
959
960 /* All CPUs start halted. CPU0 is unhalted from the machine level
961 * reset code and the rest are explicitly started up by the guest
962 * using an RTAS call */
259186a7 963 cs->halted = 1;
048706d9
DG
964
965 env->spr[SPR_HIOR] = 0;
7f763a5d 966
4be21d56 967 env->external_htab = (uint8_t *)spapr->htab;
5736245c
AK
968 if (kvm_enabled() && !env->external_htab) {
969 /*
970 * HV KVM, set external_htab to 1 so our ppc_hash64_load_hpte*
971 * functions do the right thing.
972 */
973 env->external_htab = (void *)1;
974 }
7f763a5d 975 env->htab_base = -1;
f3c75d42
AK
976 /*
977 * htab_mask is the mask used to normalize hash value to PTEG index.
978 * htab_shift is log2 of hash table size.
979 * We have 8 hpte per group, and each hpte is 16 bytes.
980 * ie have 128 bytes per hpte entry.
981 */
982 env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1;
ec4936e1 983 env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
7f763a5d 984 (spapr->htab_shift - 18);
1bba0dc9
AF
985}
986
639e8102
DG
987static void spapr_create_nvram(sPAPREnvironment *spapr)
988{
2ff3de68 989 DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
3978b863 990 DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
639e8102 991
3978b863 992 if (dinfo) {
4be74634 993 qdev_prop_set_drive_nofail(dev, "drive", blk_by_legacy_dinfo(dinfo));
639e8102
DG
994 }
995
996 qdev_init_nofail(dev);
997
998 spapr->nvram = (struct sPAPRNVRAM *)dev;
999}
1000
28df36a1
DG
1001static void spapr_rtc_create(sPAPREnvironment *spapr)
1002{
1003 DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);
1004
1005 qdev_init_nofail(dev);
1006 spapr->rtc = dev;
74e5ae28
DG
1007
1008 object_property_add_alias(qdev_get_machine(), "rtc-time",
1009 OBJECT(spapr->rtc), "date", NULL);
28df36a1
DG
1010}
1011
8c57b867 1012/* Returns whether we want to use VGA or not */
f28359d8
LZ
1013static int spapr_vga_init(PCIBus *pci_bus)
1014{
8c57b867 1015 switch (vga_interface_type) {
8c57b867 1016 case VGA_NONE:
7effdaa3
MW
1017 return false;
1018 case VGA_DEVICE:
1019 return true;
1ddcae82
AJ
1020 case VGA_STD:
1021 return pci_vga_init(pci_bus) != NULL;
8c57b867 1022 default:
f28359d8
LZ
1023 fprintf(stderr, "This vga model is not supported,"
1024 "currently it only supports -vga std\n");
8c57b867 1025 exit(0);
f28359d8 1026 }
f28359d8
LZ
1027}
1028
880ae7de
DG
1029static int spapr_post_load(void *opaque, int version_id)
1030{
1031 sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
1032 int err = 0;
1033
631b22ea 1034 /* In earlier versions, there was no separate qdev for the PAPR
880ae7de
DG
1035 * RTC, so the RTC offset was stored directly in sPAPREnvironment.
1036 * So when migrating from those versions, poke the incoming offset
1037 * value into the RTC device */
1038 if (version_id < 3) {
1039 err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset);
1040 }
1041
1042 return err;
1043}
1044
1045static bool version_before_3(void *opaque, int version_id)
1046{
1047 return version_id < 3;
1048}
1049
4be21d56
DG
1050static const VMStateDescription vmstate_spapr = {
1051 .name = "spapr",
880ae7de 1052 .version_id = 3,
4be21d56 1053 .minimum_version_id = 1,
880ae7de 1054 .post_load = spapr_post_load,
3aff6c2f 1055 .fields = (VMStateField[]) {
880ae7de
DG
1056 /* used to be @next_irq */
1057 VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
4be21d56
DG
1058
1059 /* RTC offset */
880ae7de
DG
1060 VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3),
1061
98a8b524 1062 VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
4be21d56
DG
1063 VMSTATE_END_OF_LIST()
1064 },
1065};
1066
4be21d56
DG
1067static int htab_save_setup(QEMUFile *f, void *opaque)
1068{
1069 sPAPREnvironment *spapr = opaque;
1070
4be21d56
DG
1071 /* "Iteration" header */
1072 qemu_put_be32(f, spapr->htab_shift);
1073
e68cb8b4
AK
1074 if (spapr->htab) {
1075 spapr->htab_save_index = 0;
1076 spapr->htab_first_pass = true;
1077 } else {
1078 assert(kvm_enabled());
1079
1080 spapr->htab_fd = kvmppc_get_htab_fd(false);
01a57972 1081 spapr->htab_fd_stale = false;
e68cb8b4
AK
1082 if (spapr->htab_fd < 0) {
1083 fprintf(stderr, "Unable to open fd for reading hash table from KVM: %s\n",
1084 strerror(errno));
1085 return -1;
1086 }
1087 }
1088
1089
4be21d56
DG
1090 return 0;
1091}
1092
4be21d56
DG
1093static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
1094 int64_t max_ns)
1095{
1096 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1097 int index = spapr->htab_save_index;
bc72ad67 1098 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4be21d56
DG
1099
1100 assert(spapr->htab_first_pass);
1101
1102 do {
1103 int chunkstart;
1104
1105 /* Consume invalid HPTEs */
1106 while ((index < htabslots)
1107 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1108 index++;
1109 CLEAN_HPTE(HPTE(spapr->htab, index));
1110 }
1111
1112 /* Consume valid HPTEs */
1113 chunkstart = index;
338c25b6 1114 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
4be21d56
DG
1115 && HPTE_VALID(HPTE(spapr->htab, index))) {
1116 index++;
1117 CLEAN_HPTE(HPTE(spapr->htab, index));
1118 }
1119
1120 if (index > chunkstart) {
1121 int n_valid = index - chunkstart;
1122
1123 qemu_put_be32(f, chunkstart);
1124 qemu_put_be16(f, n_valid);
1125 qemu_put_be16(f, 0);
1126 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
1127 HASH_PTE_SIZE_64 * n_valid);
1128
bc72ad67 1129 if ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
4be21d56
DG
1130 break;
1131 }
1132 }
1133 } while ((index < htabslots) && !qemu_file_rate_limit(f));
1134
1135 if (index >= htabslots) {
1136 assert(index == htabslots);
1137 index = 0;
1138 spapr->htab_first_pass = false;
1139 }
1140 spapr->htab_save_index = index;
1141}
1142
e68cb8b4
AK
1143static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
1144 int64_t max_ns)
4be21d56
DG
1145{
1146 bool final = max_ns < 0;
1147 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1148 int examined = 0, sent = 0;
1149 int index = spapr->htab_save_index;
bc72ad67 1150 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4be21d56
DG
1151
1152 assert(!spapr->htab_first_pass);
1153
1154 do {
1155 int chunkstart, invalidstart;
1156
1157 /* Consume non-dirty HPTEs */
1158 while ((index < htabslots)
1159 && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
1160 index++;
1161 examined++;
1162 }
1163
1164 chunkstart = index;
1165 /* Consume valid dirty HPTEs */
338c25b6 1166 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
4be21d56
DG
1167 && HPTE_DIRTY(HPTE(spapr->htab, index))
1168 && HPTE_VALID(HPTE(spapr->htab, index))) {
1169 CLEAN_HPTE(HPTE(spapr->htab, index));
1170 index++;
1171 examined++;
1172 }
1173
1174 invalidstart = index;
1175 /* Consume invalid dirty HPTEs */
338c25b6 1176 while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
4be21d56
DG
1177 && HPTE_DIRTY(HPTE(spapr->htab, index))
1178 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1179 CLEAN_HPTE(HPTE(spapr->htab, index));
1180 index++;
1181 examined++;
1182 }
1183
1184 if (index > chunkstart) {
1185 int n_valid = invalidstart - chunkstart;
1186 int n_invalid = index - invalidstart;
1187
1188 qemu_put_be32(f, chunkstart);
1189 qemu_put_be16(f, n_valid);
1190 qemu_put_be16(f, n_invalid);
1191 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
1192 HASH_PTE_SIZE_64 * n_valid);
1193 sent += index - chunkstart;
1194
bc72ad67 1195 if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
4be21d56
DG
1196 break;
1197 }
1198 }
1199
1200 if (examined >= htabslots) {
1201 break;
1202 }
1203
1204 if (index >= htabslots) {
1205 assert(index == htabslots);
1206 index = 0;
1207 }
1208 } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
1209
1210 if (index >= htabslots) {
1211 assert(index == htabslots);
1212 index = 0;
1213 }
1214
1215 spapr->htab_save_index = index;
1216
e68cb8b4 1217 return (examined >= htabslots) && (sent == 0) ? 1 : 0;
4be21d56
DG
1218}
1219
e68cb8b4
AK
1220#define MAX_ITERATION_NS 5000000 /* 5 ms */
1221#define MAX_KVM_BUF_SIZE 2048
1222
4be21d56
DG
1223static int htab_save_iterate(QEMUFile *f, void *opaque)
1224{
1225 sPAPREnvironment *spapr = opaque;
e68cb8b4 1226 int rc = 0;
4be21d56
DG
1227
1228 /* Iteration header */
1229 qemu_put_be32(f, 0);
1230
e68cb8b4
AK
1231 if (!spapr->htab) {
1232 assert(kvm_enabled());
1233
01a57972
SMJ
1234 rc = spapr_check_htab_fd(spapr);
1235 if (rc < 0) {
1236 return rc;
1237 }
1238
e68cb8b4
AK
1239 rc = kvmppc_save_htab(f, spapr->htab_fd,
1240 MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
1241 if (rc < 0) {
1242 return rc;
1243 }
1244 } else if (spapr->htab_first_pass) {
4be21d56
DG
1245 htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
1246 } else {
e68cb8b4 1247 rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
4be21d56
DG
1248 }
1249
1250 /* End marker */
1251 qemu_put_be32(f, 0);
1252 qemu_put_be16(f, 0);
1253 qemu_put_be16(f, 0);
1254
e68cb8b4 1255 return rc;
4be21d56
DG
1256}
1257
1258static int htab_save_complete(QEMUFile *f, void *opaque)
1259{
1260 sPAPREnvironment *spapr = opaque;
1261
1262 /* Iteration header */
1263 qemu_put_be32(f, 0);
1264
e68cb8b4
AK
1265 if (!spapr->htab) {
1266 int rc;
1267
1268 assert(kvm_enabled());
1269
01a57972
SMJ
1270 rc = spapr_check_htab_fd(spapr);
1271 if (rc < 0) {
1272 return rc;
1273 }
1274
e68cb8b4
AK
1275 rc = kvmppc_save_htab(f, spapr->htab_fd, MAX_KVM_BUF_SIZE, -1);
1276 if (rc < 0) {
1277 return rc;
1278 }
1279 close(spapr->htab_fd);
1280 spapr->htab_fd = -1;
1281 } else {
1282 htab_save_later_pass(f, spapr, -1);
1283 }
4be21d56
DG
1284
1285 /* End marker */
1286 qemu_put_be32(f, 0);
1287 qemu_put_be16(f, 0);
1288 qemu_put_be16(f, 0);
1289
1290 return 0;
1291}
1292
1293static int htab_load(QEMUFile *f, void *opaque, int version_id)
1294{
1295 sPAPREnvironment *spapr = opaque;
1296 uint32_t section_hdr;
e68cb8b4 1297 int fd = -1;
4be21d56
DG
1298
1299 if (version_id < 1 || version_id > 1) {
1300 fprintf(stderr, "htab_load() bad version\n");
1301 return -EINVAL;
1302 }
1303
1304 section_hdr = qemu_get_be32(f);
1305
1306 if (section_hdr) {
1307 /* First section, just the hash shift */
1308 if (spapr->htab_shift != section_hdr) {
1309 return -EINVAL;
1310 }
1311 return 0;
1312 }
1313
e68cb8b4
AK
1314 if (!spapr->htab) {
1315 assert(kvm_enabled());
1316
1317 fd = kvmppc_get_htab_fd(true);
1318 if (fd < 0) {
1319 fprintf(stderr, "Unable to open fd to restore KVM hash table: %s\n",
1320 strerror(errno));
1321 }
1322 }
1323
4be21d56
DG
1324 while (true) {
1325 uint32_t index;
1326 uint16_t n_valid, n_invalid;
1327
1328 index = qemu_get_be32(f);
1329 n_valid = qemu_get_be16(f);
1330 n_invalid = qemu_get_be16(f);
1331
1332 if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
1333 /* End of Stream */
1334 break;
1335 }
1336
e68cb8b4 1337 if ((index + n_valid + n_invalid) >
4be21d56
DG
1338 (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
1339 /* Bad index in stream */
1340 fprintf(stderr, "htab_load() bad index %d (%hd+%hd entries) "
e68cb8b4
AK
1341 "in htab stream (htab_shift=%d)\n", index, n_valid, n_invalid,
1342 spapr->htab_shift);
4be21d56
DG
1343 return -EINVAL;
1344 }
1345
e68cb8b4
AK
1346 if (spapr->htab) {
1347 if (n_valid) {
1348 qemu_get_buffer(f, HPTE(spapr->htab, index),
1349 HASH_PTE_SIZE_64 * n_valid);
1350 }
1351 if (n_invalid) {
1352 memset(HPTE(spapr->htab, index + n_valid), 0,
1353 HASH_PTE_SIZE_64 * n_invalid);
1354 }
1355 } else {
1356 int rc;
1357
1358 assert(fd >= 0);
1359
1360 rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
1361 if (rc < 0) {
1362 return rc;
1363 }
4be21d56
DG
1364 }
1365 }
1366
e68cb8b4
AK
1367 if (!spapr->htab) {
1368 assert(fd >= 0);
1369 close(fd);
1370 }
1371
4be21d56
DG
1372 return 0;
1373}
1374
1375static SaveVMHandlers savevm_htab_handlers = {
1376 .save_live_setup = htab_save_setup,
1377 .save_live_iterate = htab_save_iterate,
1378 .save_live_complete = htab_save_complete,
1379 .load_state = htab_load,
1380};
1381
5b2128d2
AG
1382static void spapr_boot_set(void *opaque, const char *boot_device,
1383 Error **errp)
1384{
1385 MachineState *machine = MACHINE(qdev_get_machine());
1386 machine->boot_order = g_strdup(boot_device);
1387}
1388
9fdf0c29 1389/* pSeries LPAR / sPAPR hardware init */
3ef96221 1390static void ppc_spapr_init(MachineState *machine)
9fdf0c29 1391{
3ef96221
MA
1392 ram_addr_t ram_size = machine->ram_size;
1393 const char *cpu_model = machine->cpu_model;
1394 const char *kernel_filename = machine->kernel_filename;
1395 const char *kernel_cmdline = machine->kernel_cmdline;
1396 const char *initrd_filename = machine->initrd_filename;
05769733 1397 PowerPCCPU *cpu;
e2684c0b 1398 CPUPPCState *env;
8c9f64df 1399 PCIHostState *phb;
9fdf0c29 1400 int i;
890c2b77
AK
1401 MemoryRegion *sysmem = get_system_memory();
1402 MemoryRegion *ram = g_new(MemoryRegion, 1);
658fa66b
AK
1403 MemoryRegion *rma_region;
1404 void *rma = NULL;
a8170e5e 1405 hwaddr rma_alloc_size;
b082d65a 1406 hwaddr node0_size = spapr_node0_size();
4d8d5467
BH
1407 uint32_t initrd_base = 0;
1408 long kernel_size = 0, initrd_size = 0;
b7d1f77a 1409 long load_limit, fw_size;
16457e7f 1410 bool kernel_le = false;
39ac8455 1411 char *filename;
9fdf0c29 1412
0ee2c058
AK
1413 msi_supported = true;
1414
d43b45e2
DG
1415 spapr = g_malloc0(sizeof(*spapr));
1416 QLIST_INIT(&spapr->phbs);
1417
9fdf0c29
DG
1418 cpu_ppc_hypercall = emulate_spapr_hypercall;
1419
354ac20a 1420 /* Allocate RMA if necessary */
658fa66b 1421 rma_alloc_size = kvmppc_alloc_rma(&rma);
354ac20a
DG
1422
1423 if (rma_alloc_size == -1) {
730fce59 1424 error_report("Unable to create RMA");
354ac20a
DG
1425 exit(1);
1426 }
7f763a5d 1427
c4177479 1428 if (rma_alloc_size && (rma_alloc_size < node0_size)) {
7f763a5d 1429 spapr->rma_size = rma_alloc_size;
354ac20a 1430 } else {
c4177479 1431 spapr->rma_size = node0_size;
7f763a5d
DG
1432
1433 /* With KVM, we don't actually know whether KVM supports an
1434 * unbounded RMA (PR KVM) or is limited by the hash table size
1435 * (HV KVM using VRMA), so we always assume the latter
1436 *
1437 * In that case, we also limit the initial allocations for RTAS
1438 * etc... to 256M since we have no way to know what the VRMA size
1439 * is going to be as it depends on the size of the hash table
1440 * isn't determined yet.
1441 */
1442 if (kvm_enabled()) {
1443 spapr->vrma_adjust = 1;
1444 spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
1445 }
354ac20a
DG
1446 }
1447
c4177479
AK
1448 if (spapr->rma_size > node0_size) {
1449 fprintf(stderr, "Error: Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")\n",
1450 spapr->rma_size);
1451 exit(1);
1452 }
1453
b7d1f77a
BH
1454 /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
1455 load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
9fdf0c29 1456
382be75d
DG
1457 /* We aim for a hash table of size 1/128 the size of RAM. The
1458 * normal rule of thumb is 1/64 the size of RAM, but that's much
1459 * more than needed for the Linux guests we support. */
1460 spapr->htab_shift = 18; /* Minimum architected size */
1461 while (spapr->htab_shift <= 46) {
1462 if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
1463 break;
1464 }
1465 spapr->htab_shift++;
1466 }
7f763a5d 1467
7b565160 1468 /* Set up Interrupt Controller before we create the VCPUs */
446f16a6
MA
1469 spapr->icp = xics_system_init(machine,
1470 smp_cpus * kvmppc_smt_threads() / smp_threads,
7b565160 1471 XICS_IRQS);
7b565160 1472
9fdf0c29
DG
1473 /* init CPUs */
1474 if (cpu_model == NULL) {
6b7a2cf6 1475 cpu_model = kvm_enabled() ? "host" : "POWER7";
9fdf0c29
DG
1476 }
1477 for (i = 0; i < smp_cpus; i++) {
05769733
AF
1478 cpu = cpu_ppc_init(cpu_model);
1479 if (cpu == NULL) {
9fdf0c29
DG
1480 fprintf(stderr, "Unable to find PowerPC CPU definition\n");
1481 exit(1);
1482 }
05769733
AF
1483 env = &cpu->env;
1484
9fdf0c29
DG
1485 /* Set time-base frequency to 512 MHz */
1486 cpu_ppc_tb_init(env, TIMEBASE_FREQ);
9fdf0c29 1487
2cf3eb6d
FC
1488 /* PAPR always has exception vectors in RAM not ROM. To ensure this,
1489 * MSR[IP] should never be set.
1490 */
1491 env->msr_mask &= ~(1 << 6);
048706d9
DG
1492
1493 /* Tell KVM that we're in PAPR mode */
1494 if (kvm_enabled()) {
1bc22652 1495 kvmppc_set_papr(cpu);
048706d9
DG
1496 }
1497
6d9412ea
AK
1498 if (cpu->max_compat) {
1499 if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
1500 exit(1);
1501 }
1502 }
1503
24408a7d
AK
1504 xics_cpu_setup(spapr->icp, cpu);
1505
048706d9 1506 qemu_register_reset(spapr_cpu_reset, cpu);
9fdf0c29
DG
1507 }
1508
1509 /* allocate RAM */
f73a2575 1510 spapr->ram_limit = ram_size;
f92f5da1
AK
1511 memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
1512 spapr->ram_limit);
1513 memory_region_add_subregion(sysmem, 0, ram);
9fdf0c29 1514
658fa66b
AK
1515 if (rma_alloc_size && rma) {
1516 rma_region = g_new(MemoryRegion, 1);
1517 memory_region_init_ram_ptr(rma_region, NULL, "ppc_spapr.rma",
1518 rma_alloc_size, rma);
1519 vmstate_register_ram_global(rma_region);
1520 memory_region_add_subregion(sysmem, 0, rma_region);
1521 }
1522
39ac8455 1523 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
4c56440d 1524 if (!filename) {
730fce59 1525 error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
4c56440d
SW
1526 exit(1);
1527 }
b7d1f77a
BH
1528 spapr->rtas_size = get_image_size(filename);
1529 spapr->rtas_blob = g_malloc(spapr->rtas_size);
1530 if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
730fce59 1531 error_report("Could not load LPAR rtas '%s'", filename);
39ac8455
DG
1532 exit(1);
1533 }
4d8d5467 1534 if (spapr->rtas_size > RTAS_MAX_SIZE) {
730fce59
TH
1535 error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
1536 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
4d8d5467
BH
1537 exit(1);
1538 }
7267c094 1539 g_free(filename);
39ac8455 1540
74d042e5
DG
1541 /* Set up EPOW events infrastructure */
1542 spapr_events_init(spapr);
1543
12f42174 1544 /* Set up the RTC RTAS interfaces */
28df36a1 1545 spapr_rtc_create(spapr);
12f42174 1546
b5cec4c5 1547 /* Set up VIO bus */
4040ab72
DG
1548 spapr->vio_bus = spapr_vio_bus_init();
1549
277f9acf 1550 for (i = 0; i < MAX_SERIAL_PORTS; i++) {
4040ab72 1551 if (serial_hds[i]) {
d601fac4 1552 spapr_vty_create(spapr->vio_bus, serial_hds[i]);
4040ab72
DG
1553 }
1554 }
9fdf0c29 1555
639e8102
DG
1556 /* We always have at least the nvram device on VIO */
1557 spapr_create_nvram(spapr);
1558
3384f95c 1559 /* Set up PCI */
fa28f71b
AK
1560 spapr_pci_rtas_init();
1561
89dfd6e1 1562 phb = spapr_create_phb(spapr, 0);
3384f95c 1563
277f9acf 1564 for (i = 0; i < nb_nics; i++) {
8d90ad90
DG
1565 NICInfo *nd = &nd_table[i];
1566
1567 if (!nd->model) {
7267c094 1568 nd->model = g_strdup("ibmveth");
8d90ad90
DG
1569 }
1570
1571 if (strcmp(nd->model, "ibmveth") == 0) {
d601fac4 1572 spapr_vlan_create(spapr->vio_bus, nd);
8d90ad90 1573 } else {
29b358f9 1574 pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
8d90ad90
DG
1575 }
1576 }
1577
6e270446 1578 for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
d601fac4 1579 spapr_vscsi_create(spapr->vio_bus);
6e270446
BH
1580 }
1581
f28359d8 1582 /* Graphics */
8c9f64df 1583 if (spapr_vga_init(phb->bus)) {
3fc5acde 1584 spapr->has_graphics = true;
c6e76503 1585 machine->usb |= defaults_enabled() && !machine->usb_disabled;
f28359d8
LZ
1586 }
1587
4ee9ced9 1588 if (machine->usb) {
8c9f64df 1589 pci_create_simple(phb->bus, -1, "pci-ohci");
c86580b8 1590
35139a59 1591 if (spapr->has_graphics) {
c86580b8
MA
1592 USBBus *usb_bus = usb_bus_find(-1);
1593
1594 usb_create_simple(usb_bus, "usb-kbd");
1595 usb_create_simple(usb_bus, "usb-mouse");
35139a59
DG
1596 }
1597 }
1598
7f763a5d 1599 if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
4d8d5467
BH
1600 fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
1601 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
1602 exit(1);
1603 }
1604
9fdf0c29
DG
1605 if (kernel_filename) {
1606 uint64_t lowaddr = 0;
1607
9fdf0c29
DG
1608 kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
1609 NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
3b66da82 1610 if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
16457e7f
BH
1611 kernel_size = load_elf(kernel_filename,
1612 translate_kernel_address, NULL,
1613 NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
1614 kernel_le = kernel_size > 0;
1615 }
9fdf0c29 1616 if (kernel_size < 0) {
3b66da82
AK
1617 fprintf(stderr, "qemu: error loading %s: %s\n",
1618 kernel_filename, load_elf_strerror(kernel_size));
9fdf0c29
DG
1619 exit(1);
1620 }
1621
1622 /* load initrd */
1623 if (initrd_filename) {
4d8d5467
BH
1624 /* Try to locate the initrd in the gap between the kernel
1625 * and the firmware. Add a bit of space just in case
1626 */
1627 initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
9fdf0c29 1628 initrd_size = load_image_targphys(initrd_filename, initrd_base,
4d8d5467 1629 load_limit - initrd_base);
9fdf0c29
DG
1630 if (initrd_size < 0) {
1631 fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
1632 initrd_filename);
1633 exit(1);
1634 }
1635 } else {
1636 initrd_base = 0;
1637 initrd_size = 0;
1638 }
4d8d5467 1639 }
a3467baa 1640
8e7ea787
AF
1641 if (bios_name == NULL) {
1642 bios_name = FW_FILE_NAME;
1643 }
1644 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
4c56440d 1645 if (!filename) {
68fea5a0 1646 error_report("Could not find LPAR firmware '%s'", bios_name);
4c56440d
SW
1647 exit(1);
1648 }
4d8d5467 1649 fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
68fea5a0
TH
1650 if (fw_size <= 0) {
1651 error_report("Could not load LPAR firmware '%s'", filename);
4d8d5467
BH
1652 exit(1);
1653 }
1654 g_free(filename);
4d8d5467
BH
1655
1656 spapr->entry_point = 0x100;
1657
4be21d56
DG
1658 vmstate_register(NULL, 0, &vmstate_spapr, spapr);
1659 register_savevm_live(NULL, "spapr/htab", -1, 1,
1660 &savevm_htab_handlers, spapr);
1661
9fdf0c29 1662 /* Prepare the device tree */
3bbf37f2 1663 spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
16457e7f 1664 kernel_size, kernel_le,
31fe14d1
NF
1665 kernel_cmdline,
1666 spapr->check_exception_irq);
a3467baa 1667 assert(spapr->fdt_skel != NULL);
5b2128d2 1668
46503c2b
MR
1669 /* used by RTAS */
1670 QTAILQ_INIT(&spapr->ccs_list);
1671 qemu_register_reset(spapr_ccs_reset_hook, spapr);
1672
5b2128d2 1673 qemu_register_boot_set(spapr_boot_set, spapr);
9fdf0c29
DG
1674}
1675
135a129a
AK
1676static int spapr_kvm_type(const char *vm_type)
1677{
1678 if (!vm_type) {
1679 return 0;
1680 }
1681
1682 if (!strcmp(vm_type, "HV")) {
1683 return 1;
1684 }
1685
1686 if (!strcmp(vm_type, "PR")) {
1687 return 2;
1688 }
1689
1690 error_report("Unknown kvm-type specified '%s'", vm_type);
1691 exit(1);
1692}
1693
71461b0f 1694/*
627b84f4 1695 * Implementation of an interface to adjust firmware path
71461b0f
AK
1696 * for the bootindex property handling.
1697 */
1698static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
1699 DeviceState *dev)
1700{
1701#define CAST(type, obj, name) \
1702 ((type *)object_dynamic_cast(OBJECT(obj), (name)))
1703 SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE);
1704 sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
1705
1706 if (d) {
1707 void *spapr = CAST(void, bus->parent, "spapr-vscsi");
1708 VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
1709 USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
1710
1711 if (spapr) {
1712 /*
1713 * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
1714 * We use SRP luns of the form 8000 | (bus << 8) | (id << 5) | lun
1715 * in the top 16 bits of the 64-bit LUN
1716 */
1717 unsigned id = 0x8000 | (d->id << 8) | d->lun;
1718 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
1719 (uint64_t)id << 48);
1720 } else if (virtio) {
1721 /*
1722 * We use SRP luns of the form 01000000 | (target << 8) | lun
1723 * in the top 32 bits of the 64-bit LUN
1724 * Note: the quote above is from SLOF and it is wrong,
1725 * the actual binding is:
1726 * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
1727 */
1728 unsigned id = 0x1000000 | (d->id << 16) | d->lun;
1729 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
1730 (uint64_t)id << 32);
1731 } else if (usb) {
1732 /*
1733 * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
1734 * in the top 32 bits of the 64-bit LUN
1735 */
1736 unsigned usb_port = atoi(usb->port->path);
1737 unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
1738 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
1739 (uint64_t)id << 32);
1740 }
1741 }
1742
1743 if (phb) {
1744 /* Replace "pci" with "pci@800000020000000" */
1745 return g_strdup_printf("pci@%"PRIX64, phb->buid);
1746 }
1747
1748 return NULL;
1749}
1750
23825581
EH
1751static char *spapr_get_kvm_type(Object *obj, Error **errp)
1752{
6ca1502e 1753 sPAPRMachineState *sm = SPAPR_MACHINE(obj);
23825581
EH
1754
1755 return g_strdup(sm->kvm_type);
1756}
1757
1758static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
1759{
6ca1502e 1760 sPAPRMachineState *sm = SPAPR_MACHINE(obj);
23825581
EH
1761
1762 g_free(sm->kvm_type);
1763 sm->kvm_type = g_strdup(value);
1764}
1765
1766static void spapr_machine_initfn(Object *obj)
1767{
1768 object_property_add_str(obj, "kvm-type",
1769 spapr_get_kvm_type, spapr_set_kvm_type, NULL);
49d2e648
MA
1770 object_property_set_description(obj, "kvm-type",
1771 "Specifies the KVM virtualization mode (HV, PR)",
1772 NULL);
23825581
EH
1773}
1774
34316482
AK
1775static void ppc_cpu_do_nmi_on_cpu(void *arg)
1776{
1777 CPUState *cs = arg;
1778
1779 cpu_synchronize_state(cs);
1780 ppc_cpu_do_system_reset(cs);
1781}
1782
1783static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
1784{
1785 CPUState *cs;
1786
1787 CPU_FOREACH(cs) {
1788 async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
1789 }
1790}
1791
29ee3247
AK
1792static void spapr_machine_class_init(ObjectClass *oc, void *data)
1793{
1794 MachineClass *mc = MACHINE_CLASS(oc);
71461b0f 1795 FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
34316482 1796 NMIClass *nc = NMI_CLASS(oc);
958db90c 1797
958db90c
MA
1798 mc->init = ppc_spapr_init;
1799 mc->reset = ppc_spapr_reset;
1800 mc->block_default_type = IF_SCSI;
1801 mc->max_cpus = MAX_CPUS;
1802 mc->no_parallel = 1;
5b2128d2 1803 mc->default_boot_order = "";
958db90c 1804 mc->kvm_type = spapr_kvm_type;
9e3f9733 1805 mc->has_dynamic_sysbus = true;
00b4fbe2 1806
71461b0f 1807 fwc->get_dev_path = spapr_get_fw_dev_path;
34316482 1808 nc->nmi_monitor_handler = spapr_nmi;
29ee3247
AK
1809}
1810
1811static const TypeInfo spapr_machine_info = {
1812 .name = TYPE_SPAPR_MACHINE,
1813 .parent = TYPE_MACHINE,
4aee7362 1814 .abstract = true,
6ca1502e 1815 .instance_size = sizeof(sPAPRMachineState),
23825581 1816 .instance_init = spapr_machine_initfn,
29ee3247 1817 .class_init = spapr_machine_class_init,
71461b0f
AK
1818 .interfaces = (InterfaceInfo[]) {
1819 { TYPE_FW_PATH_PROVIDER },
34316482 1820 { TYPE_NMI },
71461b0f
AK
1821 { }
1822 },
29ee3247
AK
1823};
1824
38ff32c6 1825#define SPAPR_COMPAT_2_3 \
7619c7b0
MR
1826 HW_COMPAT_2_3 \
1827 {\
1828 .driver = "spapr-pci-host-bridge",\
1829 .property = "dynamic-reconfiguration",\
1830 .value = "off",\
1831 },
38ff32c6 1832
b194df47 1833#define SPAPR_COMPAT_2_2 \
38ff32c6 1834 SPAPR_COMPAT_2_3 \
4dfd8eaa 1835 HW_COMPAT_2_2 \
b194df47
AK
1836 {\
1837 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
1838 .property = "mem_win_size",\
1839 .value = "0x20000000",\
dd754baf 1840 },
b194df47
AK
1841
1842#define SPAPR_COMPAT_2_1 \
4dfd8eaa
EH
1843 SPAPR_COMPAT_2_2 \
1844 HW_COMPAT_2_1
b194df47 1845
d25228e7
JW
1846static void spapr_compat_2_3(Object *obj)
1847{
1848}
1849
b0e966d0
JW
1850static void spapr_compat_2_2(Object *obj)
1851{
d25228e7 1852 spapr_compat_2_3(obj);
b0e966d0
JW
1853}
1854
1855static void spapr_compat_2_1(Object *obj)
1856{
1857 spapr_compat_2_2(obj);
1858}
1859
d25228e7
JW
1860static void spapr_machine_2_3_instance_init(Object *obj)
1861{
1862 spapr_compat_2_3(obj);
1863 spapr_machine_initfn(obj);
1864}
1865
b0e966d0
JW
1866static void spapr_machine_2_2_instance_init(Object *obj)
1867{
1868 spapr_compat_2_2(obj);
1869 spapr_machine_initfn(obj);
1870}
1871
1872static void spapr_machine_2_1_instance_init(Object *obj)
1873{
1874 spapr_compat_2_1(obj);
1875 spapr_machine_initfn(obj);
1876}
1877
6026db45
AK
1878static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
1879{
1880 MachineClass *mc = MACHINE_CLASS(oc);
68a27b20 1881 static GlobalProperty compat_props[] = {
dd754baf 1882 SPAPR_COMPAT_2_1
68a27b20
MT
1883 { /* end of list */ }
1884 };
6026db45
AK
1885
1886 mc->name = "pseries-2.1";
1887 mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
68a27b20 1888 mc->compat_props = compat_props;
6026db45
AK
1889}
1890
1891static const TypeInfo spapr_machine_2_1_info = {
1892 .name = TYPE_SPAPR_MACHINE "2.1",
1893 .parent = TYPE_SPAPR_MACHINE,
1894 .class_init = spapr_machine_2_1_class_init,
b0e966d0 1895 .instance_init = spapr_machine_2_1_instance_init,
6026db45
AK
1896};
1897
4aee7362
DG
1898static void spapr_machine_2_2_class_init(ObjectClass *oc, void *data)
1899{
b194df47 1900 static GlobalProperty compat_props[] = {
dd754baf 1901 SPAPR_COMPAT_2_2
b194df47
AK
1902 { /* end of list */ }
1903 };
4aee7362
DG
1904 MachineClass *mc = MACHINE_CLASS(oc);
1905
1906 mc->name = "pseries-2.2";
1907 mc->desc = "pSeries Logical Partition (PAPR compliant) v2.2";
b194df47 1908 mc->compat_props = compat_props;
4aee7362
DG
1909}
1910
1911static const TypeInfo spapr_machine_2_2_info = {
1912 .name = TYPE_SPAPR_MACHINE "2.2",
1913 .parent = TYPE_SPAPR_MACHINE,
1914 .class_init = spapr_machine_2_2_class_init,
b0e966d0 1915 .instance_init = spapr_machine_2_2_instance_init,
4aee7362
DG
1916};
1917
3dab0244
AK
1918static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data)
1919{
a1a45612 1920 static GlobalProperty compat_props[] = {
7619c7b0 1921 SPAPR_COMPAT_2_3
a1a45612
DG
1922 { /* end of list */ }
1923 };
3dab0244
AK
1924 MachineClass *mc = MACHINE_CLASS(oc);
1925
1926 mc->name = "pseries-2.3";
1927 mc->desc = "pSeries Logical Partition (PAPR compliant) v2.3";
a1a45612 1928 mc->compat_props = compat_props;
3dab0244
AK
1929}
1930
1931static const TypeInfo spapr_machine_2_3_info = {
1932 .name = TYPE_SPAPR_MACHINE "2.3",
1933 .parent = TYPE_SPAPR_MACHINE,
1934 .class_init = spapr_machine_2_3_class_init,
d25228e7
JW
1935 .instance_init = spapr_machine_2_3_instance_init,
1936};
1937
1938static void spapr_machine_2_4_class_init(ObjectClass *oc, void *data)
1939{
1940 MachineClass *mc = MACHINE_CLASS(oc);
1941
1942 mc->name = "pseries-2.4";
1943 mc->desc = "pSeries Logical Partition (PAPR compliant) v2.4";
1944 mc->alias = "pseries";
1945 mc->is_default = 1;
1946}
1947
1948static const TypeInfo spapr_machine_2_4_info = {
1949 .name = TYPE_SPAPR_MACHINE "2.4",
1950 .parent = TYPE_SPAPR_MACHINE,
1951 .class_init = spapr_machine_2_4_class_init,
3dab0244
AK
1952};
1953
29ee3247 1954static void spapr_machine_register_types(void)
9fdf0c29 1955{
29ee3247 1956 type_register_static(&spapr_machine_info);
6026db45 1957 type_register_static(&spapr_machine_2_1_info);
4aee7362 1958 type_register_static(&spapr_machine_2_2_info);
3dab0244 1959 type_register_static(&spapr_machine_2_3_info);
d25228e7 1960 type_register_static(&spapr_machine_2_4_info);
9fdf0c29
DG
1961}
1962
29ee3247 1963type_init(spapr_machine_register_types)