]> git.proxmox.com Git - mirror_qemu.git/blobdiff - hw/ppc/spapr.c
sysbus: Convert to sysbus_realize() etc. with Coccinelle
[mirror_qemu.git] / hw / ppc / spapr.c
index c03ce6afb93f407276e6315dea090d6c20972a9b..c381bb6fb5e9dc4c213ae48922ab486132947f3a 100644 (file)
  *
  * We load our kernel at 4M, leaving space for SLOF initial image
  */
-#define FDT_MAX_SIZE            0x100000
 #define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
 #define FW_MAX_SIZE             0x400000
 #define FW_FILE_NAME            "slof.bin"
 #define FW_OVERHEAD             0x2800000
 #define KERNEL_LOAD_ADDR        FW_MAX_SIZE
 
-#define MIN_RMA_SLOF            128UL
+#define MIN_RMA_SLOF            (128 * MiB)
 
 #define PHANDLE_INTC            0x00001111
 
@@ -217,10 +216,9 @@ static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
                           sizeof(associativity));
 }
 
-/* Populate the "ibm,pa-features" property */
-static void spapr_populate_pa_features(SpaprMachineState *spapr,
-                                       PowerPCCPU *cpu,
-                                       void *fdt, int offset)
+static void spapr_dt_pa_features(SpaprMachineState *spapr,
+                                 PowerPCCPU *cpu,
+                                 void *fdt, int offset)
 {
     uint8_t pa_features_206[] = { 6, 0,
         0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
@@ -315,8 +313,8 @@ static void add_str(GString *s, const gchar *s1)
     g_string_append_len(s, s1, strlen(s1) + 1);
 }
 
-static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
-                                       hwaddr size)
+static int spapr_dt_memory_node(void *fdt, int nodeid, hwaddr start,
+                                hwaddr size)
 {
     uint32_t associativity[] = {
         cpu_to_be32(0x4), /* length */
@@ -341,257 +339,6 @@ static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
     return off;
 }
 
-static int spapr_populate_memory(SpaprMachineState *spapr, void *fdt)
-{
-    MachineState *machine = MACHINE(spapr);
-    hwaddr mem_start, node_size;
-    int i, nb_nodes = machine->numa_state->num_nodes;
-    NodeInfo *nodes = machine->numa_state->nodes;
-
-    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
-        if (!nodes[i].node_mem) {
-            continue;
-        }
-        if (mem_start >= machine->ram_size) {
-            node_size = 0;
-        } else {
-            node_size = nodes[i].node_mem;
-            if (node_size > machine->ram_size - mem_start) {
-                node_size = machine->ram_size - mem_start;
-            }
-        }
-        if (!mem_start) {
-            /* spapr_machine_init() checks for rma_size <= node0_size
-             * already */
-            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
-            mem_start += spapr->rma_size;
-            node_size -= spapr->rma_size;
-        }
-        for ( ; node_size; ) {
-            hwaddr sizetmp = pow2floor(node_size);
-
-            /* mem_start != 0 here */
-            if (ctzl(mem_start) < ctzl(sizetmp)) {
-                sizetmp = 1ULL << ctzl(mem_start);
-            }
-
-            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
-            node_size -= sizetmp;
-            mem_start += sizetmp;
-        }
-    }
-
-    return 0;
-}
-
-static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
-                                  SpaprMachineState *spapr)
-{
-    MachineState *ms = MACHINE(spapr);
-    PowerPCCPU *cpu = POWERPC_CPU(cs);
-    CPUPPCState *env = &cpu->env;
-    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
-    int index = spapr_get_vcpu_id(cpu);
-    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
-                       0xffffffff, 0xffffffff};
-    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
-        : SPAPR_TIMEBASE_FREQ;
-    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
-    uint32_t page_sizes_prop[64];
-    size_t page_sizes_prop_size;
-    unsigned int smp_threads = ms->smp.threads;
-    uint32_t vcpus_per_socket = smp_threads * ms->smp.cores;
-    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
-    int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
-    SpaprDrc *drc;
-    int drc_index;
-    uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
-    int i;
-
-    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
-    if (drc) {
-        drc_index = spapr_drc_index(drc);
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
-    }
-
-    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
-    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
-
-    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
-    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
-                           env->dcache_line_size)));
-    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
-                           env->dcache_line_size)));
-    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
-                           env->icache_line_size)));
-    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
-                           env->icache_line_size)));
-
-    if (pcc->l1_dcache_size) {
-        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
-                               pcc->l1_dcache_size)));
-    } else {
-        warn_report("Unknown L1 dcache size for cpu");
-    }
-    if (pcc->l1_icache_size) {
-        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
-                               pcc->l1_icache_size)));
-    } else {
-        warn_report("Unknown L1 icache size for cpu");
-    }
-
-    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
-    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
-    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size)));
-    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
-    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
-    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
-
-    if (env->spr_cb[SPR_PURR].oea_read) {
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
-    }
-    if (env->spr_cb[SPR_SPURR].oea_read) {
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
-    }
-
-    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
-        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
-                          segs, sizeof(segs))));
-    }
-
-    /* Advertise VSX (vector extensions) if available
-     *   1               == VMX / Altivec available
-     *   2               == VSX available
-     *
-     * Only CPUs for which we create core types in spapr_cpu_core.c
-     * are possible, and all of those have VMX */
-    if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
-    } else {
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
-    }
-
-    /* Advertise DFP (Decimal Floating Point) if available
-     *   0 / no property == no DFP
-     *   1               == DFP available */
-    if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
-        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
-    }
-
-    page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
-                                                      sizeof(page_sizes_prop));
-    if (page_sizes_prop_size) {
-        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
-                          page_sizes_prop, page_sizes_prop_size)));
-    }
-
-    spapr_populate_pa_features(spapr, cpu, fdt, offset);
-
-    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
-                           cs->cpu_index / vcpus_per_socket)));
-
-    _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
-                      pft_size_prop, sizeof(pft_size_prop))));
-
-    if (ms->numa_state->num_nodes > 1) {
-        _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
-    }
-
-    _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
-
-    if (pcc->radix_page_info) {
-        for (i = 0; i < pcc->radix_page_info->count; i++) {
-            radix_AP_encodings[i] =
-                cpu_to_be32(pcc->radix_page_info->entries[i]);
-        }
-        _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
-                          radix_AP_encodings,
-                          pcc->radix_page_info->count *
-                          sizeof(radix_AP_encodings[0]))));
-    }
-
-    /*
-     * We set this property to let the guest know that it can use the large
-     * decrementer and its width in bits.
-     */
-    if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF)
-        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
-                              pcc->lrg_decr_bits)));
-}
-
-static void spapr_populate_cpus_dt_node(void *fdt, SpaprMachineState *spapr)
-{
-    CPUState **rev;
-    CPUState *cs;
-    int n_cpus;
-    int cpus_offset;
-    char *nodename;
-    int i;
-
-    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
-    _FDT(cpus_offset);
-    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
-    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
-
-    /*
-     * We walk the CPUs in reverse order to ensure that CPU DT nodes
-     * created by fdt_add_subnode() end up in the right order in FDT
-     * for the guest kernel the enumerate the CPUs correctly.
-     *
-     * The CPU list cannot be traversed in reverse order, so we need
-     * to do extra work.
-     */
-    n_cpus = 0;
-    rev = NULL;
-    CPU_FOREACH(cs) {
-        rev = g_renew(CPUState *, rev, n_cpus + 1);
-        rev[n_cpus++] = cs;
-    }
-
-    for (i = n_cpus - 1; i >= 0; i--) {
-        CPUState *cs = rev[i];
-        PowerPCCPU *cpu = POWERPC_CPU(cs);
-        int index = spapr_get_vcpu_id(cpu);
-        DeviceClass *dc = DEVICE_GET_CLASS(cs);
-        int offset;
-
-        if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
-            continue;
-        }
-
-        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
-        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
-        g_free(nodename);
-        _FDT(offset);
-        spapr_populate_cpu_dt(cs, fdt, offset, spapr);
-    }
-
-    g_free(rev);
-}
-
-static int spapr_rng_populate_dt(void *fdt)
-{
-    int node;
-    int ret;
-
-    node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities");
-    if (node <= 0) {
-        return -1;
-    }
-    ret = fdt_setprop_string(fdt, node, "device_type",
-                             "ibm,platform-facilities");
-    ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1);
-    ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0);
-
-    node = fdt_add_subnode(fdt, node, "ibm,random-v1");
-    if (node <= 0) {
-        return -1;
-    }
-    ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random");
-
-    return ret ? -1 : 0;
-}
-
 static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
 {
     MemoryDeviceInfoList *info;
@@ -642,9 +389,8 @@ spapr_get_drconf_cell(uint32_t seq_lmbs, uint64_t base_addr,
     return elem;
 }
 
-/* ibm,dynamic-memory-v2 */
-static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt,
-                                   int offset, MemoryDeviceInfoList *dimms)
+static int spapr_dt_dynamic_memory_v2(SpaprMachineState *spapr, void *fdt,
+                                      int offset, MemoryDeviceInfoList *dimms)
 {
     MachineState *machine = MACHINE(spapr);
     uint8_t *int_buf, *cur_index;
@@ -699,7 +445,8 @@ static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt,
         g_assert(drc);
         elem = spapr_get_drconf_cell(size / lmb_size, addr,
                                      spapr_drc_index(drc), node,
-                                     SPAPR_LMB_FLAGS_ASSIGNED);
+                                     (SPAPR_LMB_FLAGS_ASSIGNED |
+                                      SPAPR_LMB_FLAGS_HOTREMOVABLE));
         QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
         nr_entries++;
         cur_addr = addr + size;
@@ -735,8 +482,7 @@ static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt,
     return 0;
 }
 
-/* ibm,dynamic-memory */
-static int spapr_populate_drmem_v1(SpaprMachineState *spapr, void *fdt,
+static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt,
                                    int offset, MemoryDeviceInfoList *dimms)
 {
     MachineState *machine = MACHINE(spapr);
@@ -790,117 +536,355 @@ static int spapr_populate_drmem_v1(SpaprMachineState *spapr, void *fdt,
                                             SPAPR_LMB_FLAGS_DRC_INVALID);
         }
 
-        cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
-    }
-    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
-    g_free(int_buf);
-    if (ret < 0) {
-        return -1;
+        cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
+    g_free(int_buf);
+    if (ret < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+/*
+ * Adds ibm,dynamic-reconfiguration-memory node.
+ * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
+ * of this device tree node.
+ */
+static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
+                                                   void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    int nb_numa_nodes = machine->numa_state->num_nodes;
+    int ret, i, offset;
+    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
+    uint32_t *int_buf, *cur_index, buf_len;
+    int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+    MemoryDeviceInfoList *dimms = NULL;
+
+    /*
+     * Don't create the node if there is no device memory
+     */
+    if (machine->ram_size == machine->maxram_size) {
+        return 0;
+    }
+
+    offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
+
+    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
+                    sizeof(prop_lmb_size));
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */
+    dimms = qmp_memory_device_list();
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) {
+        ret = spapr_dt_dynamic_memory_v2(spapr, fdt, offset, dimms);
+    } else {
+        ret = spapr_dt_dynamic_memory(spapr, fdt, offset, dimms);
+    }
+    qapi_free_MemoryDeviceInfoList(dimms);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* ibm,associativity-lookup-arrays */
+    buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
+    cur_index = int_buf = g_malloc0(buf_len);
+    int_buf[0] = cpu_to_be32(nr_nodes);
+    int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
+    cur_index += 2;
+    for (i = 0; i < nr_nodes; i++) {
+        uint32_t associativity[] = {
+            cpu_to_be32(0x0),
+            cpu_to_be32(0x0),
+            cpu_to_be32(0x0),
+            cpu_to_be32(i)
+        };
+        memcpy(cur_index, associativity, sizeof(associativity));
+        cur_index += 4;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
+            (cur_index - int_buf) * sizeof(uint32_t));
+    g_free(int_buf);
+
+    return ret;
+}
+
+static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = machine->numa_state->num_nodes;
+    NodeInfo *nodes = machine->numa_state->nodes;
+
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
+        if (mem_start >= machine->ram_size) {
+            node_size = 0;
+        } else {
+            node_size = nodes[i].node_mem;
+            if (node_size > machine->ram_size - mem_start) {
+                node_size = machine->ram_size - mem_start;
+            }
+        }
+        if (!mem_start) {
+            /* spapr_machine_init() checks for rma_size <= node0_size
+             * already */
+            spapr_dt_memory_node(fdt, i, 0, spapr->rma_size);
+            mem_start += spapr->rma_size;
+            node_size -= spapr->rma_size;
+        }
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            spapr_dt_memory_node(fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
+    }
+
+    /* Generate ibm,dynamic-reconfiguration-memory node if required */
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) {
+        int ret;
+
+        g_assert(smc->dr_lmb_enabled);
+        ret = spapr_dt_dynamic_reconfiguration_memory(spapr, fdt);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
+                         SpaprMachineState *spapr)
+{
+    MachineState *ms = MACHINE(spapr);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    int index = spapr_get_vcpu_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
+        : SPAPR_TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    unsigned int smp_threads = ms->smp.threads;
+    uint32_t vcpus_per_socket = smp_threads * ms->smp.cores;
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+    int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
+    SpaprDrc *drc;
+    int drc_index;
+    uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
+    int i;
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
+    if (drc) {
+        drc_index = spapr_drc_index(drc);
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                           env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                           env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        warn_report("Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        warn_report("Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+    }
+    if (env->spr_cb[SPR_SPURR].oea_read) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
+    }
+
+    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                          segs, sizeof(segs))));
+    }
+
+    /* Advertise VSX (vector extensions) if available
+     *   1               == VMX / Altivec available
+     *   2               == VSX available
+     *
+     * Only CPUs for which we create core types in spapr_cpu_core.c
+     * are possible, and all of those have VMX */
+    if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
+    } else {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
+                                                      sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                          page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    spapr_dt_pa_features(spapr, cpu, fdt, offset);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+                           cs->cpu_index / vcpus_per_socket)));
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
+                      pft_size_prop, sizeof(pft_size_prop))));
+
+    if (ms->numa_state->num_nodes > 1) {
+        _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
     }
-    return 0;
-}
 
-/*
- * Adds ibm,dynamic-reconfiguration-memory node.
- * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
- * of this device tree node.
- */
-static int spapr_populate_drconf_memory(SpaprMachineState *spapr, void *fdt)
-{
-    MachineState *machine = MACHINE(spapr);
-    int nb_numa_nodes = machine->numa_state->num_nodes;
-    int ret, i, offset;
-    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
-    uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
-    uint32_t *int_buf, *cur_index, buf_len;
-    int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
-    MemoryDeviceInfoList *dimms = NULL;
+    _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
 
-    /*
-     * Don't create the node if there is no device memory
-     */
-    if (machine->ram_size == machine->maxram_size) {
-        return 0;
+    if (pcc->radix_page_info) {
+        for (i = 0; i < pcc->radix_page_info->count; i++) {
+            radix_AP_encodings[i] =
+                cpu_to_be32(pcc->radix_page_info->entries[i]);
+        }
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
+                          radix_AP_encodings,
+                          pcc->radix_page_info->count *
+                          sizeof(radix_AP_encodings[0]))));
     }
 
-    offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
+    /*
+     * We set this property to let the guest know that it can use the large
+     * decrementer and its width in bits.
+     */
+    if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF)
+        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
+                              pcc->lrg_decr_bits)));
+}
 
-    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
-                    sizeof(prop_lmb_size));
-    if (ret < 0) {
-        return ret;
-    }
+static void spapr_dt_cpus(void *fdt, SpaprMachineState *spapr)
+{
+    CPUState **rev;
+    CPUState *cs;
+    int n_cpus;
+    int cpus_offset;
+    char *nodename;
+    int i;
 
-    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
-    if (ret < 0) {
-        return ret;
-    }
+    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+    _FDT(cpus_offset);
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
 
-    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
-    if (ret < 0) {
-        return ret;
+    /*
+     * We walk the CPUs in reverse order to ensure that CPU DT nodes
+     * created by fdt_add_subnode() end up in the right order in FDT
+     * for the guest kernel the enumerate the CPUs correctly.
+     *
+     * The CPU list cannot be traversed in reverse order, so we need
+     * to do extra work.
+     */
+    n_cpus = 0;
+    rev = NULL;
+    CPU_FOREACH(cs) {
+        rev = g_renew(CPUState *, rev, n_cpus + 1);
+        rev[n_cpus++] = cs;
     }
 
-    /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */
-    dimms = qmp_memory_device_list();
-    if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) {
-        ret = spapr_populate_drmem_v2(spapr, fdt, offset, dimms);
-    } else {
-        ret = spapr_populate_drmem_v1(spapr, fdt, offset, dimms);
-    }
-    qapi_free_MemoryDeviceInfoList(dimms);
+    for (i = n_cpus - 1; i >= 0; i--) {
+        CPUState *cs = rev[i];
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int index = spapr_get_vcpu_id(cpu);
+        DeviceClass *dc = DEVICE_GET_CLASS(cs);
+        int offset;
 
-    if (ret < 0) {
-        return ret;
-    }
+        if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
+            continue;
+        }
 
-    /* ibm,associativity-lookup-arrays */
-    buf_len = (nr_nodes * 4 + 2) * sizeof(uint32_t);
-    cur_index = int_buf = g_malloc0(buf_len);
-    int_buf[0] = cpu_to_be32(nr_nodes);
-    int_buf[1] = cpu_to_be32(4); /* Number of entries per associativity list */
-    cur_index += 2;
-    for (i = 0; i < nr_nodes; i++) {
-        uint32_t associativity[] = {
-            cpu_to_be32(0x0),
-            cpu_to_be32(0x0),
-            cpu_to_be32(0x0),
-            cpu_to_be32(i)
-        };
-        memcpy(cur_index, associativity, sizeof(associativity));
-        cur_index += 4;
+        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+        g_free(nodename);
+        _FDT(offset);
+        spapr_dt_cpu(cs, fdt, offset, spapr);
     }
-    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
-            (cur_index - int_buf) * sizeof(uint32_t));
-    g_free(int_buf);
 
-    return ret;
+    g_free(rev);
 }
 
-static int spapr_dt_cas_updates(SpaprMachineState *spapr, void *fdt,
-                                SpaprOptionVector *ov5_updates)
+static int spapr_dt_rng(void *fdt)
 {
-    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
-    int ret = 0, offset;
+    int node;
+    int ret;
 
-    /* Generate ibm,dynamic-reconfiguration-memory node if required */
-    if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) {
-        g_assert(smc->dr_lmb_enabled);
-        ret = spapr_populate_drconf_memory(spapr, fdt);
-        if (ret) {
-            return ret;
-        }
+    node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities");
+    if (node <= 0) {
+        return -1;
     }
+    ret = fdt_setprop_string(fdt, node, "device_type",
+                             "ibm,platform-facilities");
+    ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1);
+    ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0);
 
-    offset = fdt_path_offset(fdt, "/chosen");
-    if (offset < 0) {
-        offset = fdt_add_subnode(fdt, 0, "chosen");
-        if (offset < 0) {
-            return offset;
-        }
+    node = fdt_add_subnode(fdt, node, "ibm,random-v1");
+    if (node <= 0) {
+        return -1;
     }
-    return spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas,
-                                  "ibm,architecture-vec-5");
+    ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random");
+
+    return ret ? -1 : 0;
 }
 
 static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
@@ -967,6 +951,29 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
     _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
                      maxdomains, sizeof(maxdomains)));
 
+    /*
+     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
+     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
+     *
+     * The system reset requirements are driven by existing Linux and PowerVM
+     * implementation which (contrary to PAPR) saves r3 in the error log
+     * structure like machine check, so Linux expects to find the saved r3
+     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
+     * does not look at the error value).
+     *
+     * System reset interrupts are not subject to interlock like machine
+     * check, so this memory area could be corrupted if the sreset is
+     * interrupted by a machine check (or vice versa) if it was shared. To
+     * prevent this, system reset uses per-CPU areas for the sreset save
+     * area. A system reset that interrupts a system reset handler could
+     * still overwrite this area, but Linux doesn't try to recover in that
+     * case anyway.
+     *
+     * The extra 8 bytes is required because Linux's FWNMI error log check
+     * is off-by-one.
+     */
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
+                         ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
                           RTAS_ERROR_LOG_MAX));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
@@ -1040,81 +1047,91 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
                      val, sizeof(val)));
 }
 
-static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt)
+static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
 {
     MachineState *machine = MACHINE(spapr);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
     int chosen;
-    const char *boot_device = machine->boot_order;
-    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
-    size_t cb = 0;
-    char *bootlist = get_boot_devices_list(&cb);
 
     _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
 
-    if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
-        _FDT(fdt_setprop_string(fdt, chosen, "bootargs",
-                                machine->kernel_cmdline));
-    }
-    if (spapr->initrd_size) {
-        _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
-                              spapr->initrd_base));
-        _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
-                              spapr->initrd_base + spapr->initrd_size));
-    }
+    if (reset) {
+        const char *boot_device = machine->boot_order;
+        char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+        size_t cb = 0;
+        char *bootlist = get_boot_devices_list(&cb);
+
+        if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+            _FDT(fdt_setprop_string(fdt, chosen, "bootargs",
+                                    machine->kernel_cmdline));
+        }
+
+        if (spapr->initrd_size) {
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+                                  spapr->initrd_base));
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+                                  spapr->initrd_base + spapr->initrd_size));
+        }
 
-    if (spapr->kernel_size) {
-        uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
-                              cpu_to_be64(spapr->kernel_size) };
+        if (spapr->kernel_size) {
+            uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
+                                  cpu_to_be64(spapr->kernel_size) };
 
-        _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
+            _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
                          &kprop, sizeof(kprop)));
-        if (spapr->kernel_le) {
-            _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+            if (spapr->kernel_le) {
+                _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+            }
         }
-    }
-    if (boot_menu) {
-        _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
-    }
-    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
-    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
-    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
+        if (boot_menu) {
+            _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
+        }
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
 
-    if (cb && bootlist) {
-        int i;
+        if (cb && bootlist) {
+            int i;
 
-        for (i = 0; i < cb; i++) {
-            if (bootlist[i] == '\n') {
-                bootlist[i] = ' ';
+            for (i = 0; i < cb; i++) {
+                if (bootlist[i] == '\n') {
+                    bootlist[i] = ' ';
+                }
             }
+            _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
         }
-        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
-    }
 
-    if (boot_device && strlen(boot_device)) {
-        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
-    }
+        if (boot_device && strlen(boot_device)) {
+            _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
+        }
+
+        if (!spapr->has_graphics && stdout_path) {
+            /*
+             * "linux,stdout-path" and "stdout" properties are
+             * deprecated by linux kernel. New platforms should only
+             * use the "stdout-path" property. Set the new property
+             * and continue using older property to remain compatible
+             * with the existing firmware.
+             */
+            _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
+            _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
+        }
 
-    if (!spapr->has_graphics && stdout_path) {
         /*
-         * "linux,stdout-path" and "stdout" properties are deprecated by linux
-         * kernel. New platforms should only use the "stdout-path" property. Set
-         * the new property and continue using older property to remain
-         * compatible with the existing firmware.
+         * We can deal with BAR reallocation just fine, advertise it
+         * to the guest
          */
-        _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
-        _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
-    }
+        if (smc->linux_pci_probe) {
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
+        }
 
-    /* We can deal with BAR reallocation just fine, advertise it to the guest */
-    if (smc->linux_pci_probe) {
-        _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
-    }
+        spapr_dt_ov5_platform_support(spapr, fdt, chosen);
 
-    spapr_dt_ov5_platform_support(spapr, fdt, chosen);
+        g_free(stdout_path);
+        g_free(bootlist);
+    }
 
-    g_free(stdout_path);
-    g_free(bootlist);
+    _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));
 }
 
 static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
@@ -1192,7 +1209,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
     /* /interrupt controller */
     spapr_irq_dt(spapr, spapr_max_server_number(spapr), fdt, PHANDLE_INTC);
 
-    ret = spapr_populate_memory(spapr, fdt);
+    ret = spapr_dt_memory(spapr, fdt);
     if (ret < 0) {
         error_report("couldn't setup memory nodes in fdt");
         exit(1);
@@ -1202,7 +1219,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
     spapr_dt_vdevice(spapr->vio_bus, fdt);
 
     if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
-        ret = spapr_rng_populate_dt(fdt);
+        ret = spapr_dt_rng(fdt);
         if (ret < 0) {
             error_report("could not set up rng device in the fdt");
             exit(1);
@@ -1217,8 +1234,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
         }
     }
 
-    /* cpus */
-    spapr_populate_cpus_dt_node(fdt, spapr);
+    spapr_dt_cpus(fdt, spapr);
 
     if (smc->dr_lmb_enabled) {
         _FDT(spapr_dt_drc(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
@@ -1240,9 +1256,7 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
     spapr_dt_rtas(spapr, fdt);
 
     /* /chosen */
-    if (reset) {
-        spapr_dt_chosen(spapr, fdt);
-    }
+    spapr_dt_chosen(spapr, fdt, reset);
 
     /* /hypervisor */
     if (kvm_enabled()) {
@@ -1261,13 +1275,6 @@ void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
         }
     }
 
-    /* ibm,client-architecture-support updates */
-    ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas);
-    if (ret < 0) {
-        error_report("couldn't setup CAS properties fdt");
-        exit(1);
-    }
-
     if (smc->dr_phb_enabled) {
         ret = spapr_dt_drc(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_PHB);
         if (ret < 0) {
@@ -1569,13 +1576,11 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
     spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
 }
 
-void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr)
+void spapr_setup_hpt(SpaprMachineState *spapr)
 {
     int hpt_shift;
 
-    if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED)
-        || (spapr->cas_reboot
-            && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) {
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
         hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
     } else {
         uint64_t current_ram_size;
@@ -1585,9 +1590,16 @@ void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr)
     }
     spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
 
-    if (spapr->vrma_adjust) {
-        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(MACHINE(spapr)),
-                                          spapr->htab_shift);
+    if (kvm_enabled()) {
+        hwaddr vrma_limit = kvmppc_vrma_limit(spapr->htab_shift);
+
+        /* Check our RMA fits in the possible VRMA */
+        if (vrma_limit < spapr->rma_size) {
+            error_report("Unable to create %" HWADDR_PRIu
+                         "MiB RMA (VRMA only allows %" HWADDR_PRIu "MiB",
+                         spapr->rma_size / MiB, vrma_limit / MiB);
+            exit(EXIT_FAILURE);
+        }
     }
 }
 
@@ -1627,21 +1639,15 @@ static void spapr_machine_reset(MachineState *machine)
         spapr->patb_entry = PATE1_GR;
         spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT);
     } else {
-        spapr_setup_hpt_and_vrma(spapr);
+        spapr_setup_hpt(spapr);
     }
 
     qemu_devices_reset();
 
-    /*
-     * If this reset wasn't generated by CAS, we should reset our
-     * negotiated options and start from scratch
-     */
-    if (!spapr->cas_reboot) {
-        spapr_ovec_cleanup(spapr->ov5_cas);
-        spapr->ov5_cas = spapr_ovec_new();
+    spapr_ovec_cleanup(spapr->ov5_cas);
+    spapr->ov5_cas = spapr_ovec_new();
 
-        ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
-    }
+    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
 
     /*
      * This is fixing some of the default configuration of the XIVE
@@ -1691,23 +1697,22 @@ static void spapr_machine_reset(MachineState *machine)
     spapr->fdt_blob = fdt;
 
     /* Set up the entry state */
-    spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, fdt_addr);
+    spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, 0, fdt_addr, 0);
     first_ppc_cpu->env.gpr[5] = 0;
 
-    spapr->cas_reboot = false;
-
-    spapr->mc_status = -1;
-    spapr->guest_machine_check_addr = -1;
+    spapr->fwnmi_system_reset_addr = -1;
+    spapr->fwnmi_machine_check_addr = -1;
+    spapr->fwnmi_machine_check_interlock = -1;
 
     /* Signal all vCPUs waiting on this condition */
-    qemu_cond_broadcast(&spapr->mc_delivery_cond);
+    qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond);
 
     migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
 {
-    DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
+    DeviceState *dev = qdev_new("spapr-nvram");
     DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
 
     if (dinfo) {
@@ -1715,20 +1720,20 @@ static void spapr_create_nvram(SpaprMachineState *spapr)
                             &error_fatal);
     }
 
-    qdev_init_nofail(dev);
+    qdev_realize_and_unref(dev, &spapr->vio_bus->bus, &error_fatal);
 
     spapr->nvram = (struct SpaprNvram *)dev;
 }
 
 static void spapr_rtc_create(SpaprMachineState *spapr)
 {
-    object_initialize_child(OBJECT(spapr), "rtc",
-                            &spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC,
-                            &error_fatal, NULL);
+    object_initialize_child_with_props(OBJECT(spapr), "rtc", &spapr->rtc,
+                                       sizeof(spapr->rtc), TYPE_SPAPR_RTC,
+                                       &error_fatal, NULL);
     object_property_set_bool(OBJECT(&spapr->rtc), true, "realized",
                               &error_fatal);
     object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
-                              "date", &error_fatal);
+                              "date");
 }
 
 /* Returns whether we want to use VGA or not */
@@ -1989,7 +1994,7 @@ static bool spapr_fwnmi_needed(void *opaque)
 {
     SpaprMachineState *spapr = (SpaprMachineState *)opaque;
 
-    return spapr->guest_machine_check_addr != -1;
+    return spapr->fwnmi_machine_check_addr != -1;
 }
 
 static int spapr_fwnmi_pre_save(void *opaque)
@@ -2000,7 +2005,7 @@ static int spapr_fwnmi_pre_save(void *opaque)
      * Check if machine check handling is in progress and print a
      * warning message.
      */
-    if (spapr->mc_status != -1) {
+    if (spapr->fwnmi_machine_check_interlock != -1) {
         warn_report("A machine check is being handled during migration. The"
                 "handler may run and log hardware error on the destination");
     }
@@ -2008,15 +2013,16 @@ static int spapr_fwnmi_pre_save(void *opaque)
     return 0;
 }
 
-static const VMStateDescription vmstate_spapr_machine_check = {
-    .name = "spapr_machine_check",
+static const VMStateDescription vmstate_spapr_fwnmi = {
+    .name = "spapr_fwnmi",
     .version_id = 1,
     .minimum_version_id = 1,
     .needed = spapr_fwnmi_needed,
     .pre_save = spapr_fwnmi_pre_save,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
-        VMSTATE_INT32(mc_status, SpaprMachineState),
+        VMSTATE_UINT64(fwnmi_system_reset_addr, SpaprMachineState),
+        VMSTATE_UINT64(fwnmi_machine_check_addr, SpaprMachineState),
+        VMSTATE_INT32(fwnmi_machine_check_interlock, SpaprMachineState),
         VMSTATE_END_OF_LIST()
     },
 };
@@ -2055,7 +2061,7 @@ static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_large_decr,
         &vmstate_spapr_cap_ccf_assist,
         &vmstate_spapr_cap_fwnmi,
-        &vmstate_spapr_machine_check,
+        &vmstate_spapr_fwnmi,
         NULL
     }
 };
@@ -2634,13 +2640,49 @@ static PCIHostState *spapr_create_default_phb(void)
 {
     DeviceState *dev;
 
-    dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
+    dev = qdev_new(TYPE_SPAPR_PCI_HOST_BRIDGE);
     qdev_prop_set_uint32(dev, "index", 0);
-    qdev_init_nofail(dev);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
     return PCI_HOST_BRIDGE(dev);
 }
 
+static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
+{
+    MachineState *machine = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    hwaddr rma_size = machine->ram_size;
+    hwaddr node0_size = spapr_node0_size(machine);
+
+    /* RMA has to fit in the first NUMA node */
+    rma_size = MIN(rma_size, node0_size);
+
+    /*
+     * VRMA access is via a special 1TiB SLB mapping, so the RMA can
+     * never exceed that
+     */
+    rma_size = MIN(rma_size, 1 * TiB);
+
+    /*
+     * Clamp the RMA size based on machine type.  This is for
+     * migration compatibility with older qemu versions, which limited
+     * the RMA size for complicated and mostly bad reasons.
+     */
+    if (smc->rma_limit) {
+        rma_size = MIN(rma_size, smc->rma_limit);
+    }
+
+    if (rma_size < MIN_RMA_SLOF) {
+        error_setg(errp,
+                   "pSeries SLOF firmware requires >= %" HWADDR_PRIx
+                   "ldMiB guest RMA (Real Mode Area memory)",
+                   MIN_RMA_SLOF / MiB);
+        return 0;
+    }
+
+    return rma_size;
+}
+
 /* pSeries LPAR / sPAPR hardware init */
 static void spapr_machine_init(MachineState *machine)
 {
@@ -2652,7 +2694,6 @@ static void spapr_machine_init(MachineState *machine)
     PCIHostState *phb;
     int i;
     MemoryRegion *sysmem = get_system_memory();
-    hwaddr node0_size = spapr_node0_size(machine);
     long load_limit, fw_size;
     char *filename;
     Error *resize_hpt_err = NULL;
@@ -2692,34 +2733,7 @@ static void spapr_machine_init(MachineState *machine)
         exit(1);
     }
 
-    spapr->rma_size = node0_size;
-
-    /* With KVM, we don't actually know whether KVM supports an
-     * unbounded RMA (PR KVM) or is limited by the hash table size
-     * (HV KVM using VRMA), so we always assume the latter
-     *
-     * In that case, we also limit the initial allocations for RTAS
-     * etc... to 256M since we have no way to know what the VRMA size
-     * is going to be as it depends on the size of the hash table
-     * which isn't determined yet.
-     */
-    if (kvm_enabled()) {
-        spapr->vrma_adjust = 1;
-        spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
-    }
-
-    /* Actually we don't support unbounded RMA anymore since we added
-     * proper emulation of HV mode. The max we can get is 16G which
-     * also happens to be what we configure for PAPR mode so make sure
-     * we don't do anything bigger than that
-     */
-    spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
-
-    if (spapr->rma_size > node0_size) {
-        error_report("Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")",
-                     spapr->rma_size);
-        exit(1);
-    }
+    spapr->rma_size = spapr_rma_size(spapr, &error_fatal);
 
     /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
@@ -2813,6 +2827,7 @@ static void spapr_machine_init(MachineState *machine)
     if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
         ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
                               spapr->max_compat_pvr)) {
+        spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_300);
         /* KVM and TCG always allow GTSE with radix... */
         spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
     }
@@ -2869,7 +2884,7 @@ static void spapr_machine_init(MachineState *machine)
         spapr_create_lmb_dr_connectors(spapr);
     }
 
-    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) {
         /* Create the error string for live migration blocker */
         error_setg(&spapr->fwnmi_migration_blocker,
             "A machine check is being handled during migration. The handler"
@@ -2956,13 +2971,6 @@ static void spapr_machine_init(MachineState *machine)
         }
     }
 
-    if (spapr->rma_size < (MIN_RMA_SLOF * MiB)) {
-        error_report(
-            "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)",
-            MIN_RMA_SLOF);
-        exit(1);
-    }
-
     if (kernel_filename) {
         uint64_t lowaddr = 0;
 
@@ -3045,7 +3053,7 @@ static void spapr_machine_init(MachineState *machine)
         kvmppc_spapr_enable_inkernel_multitce();
     }
 
-    qemu_cond_init(&spapr->mc_delivery_cond);
+    qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
@@ -3223,30 +3231,6 @@ static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
     }
 }
 
-static void spapr_get_vsmt(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-    visit_type_uint32(v, name, (uint32_t *)opaque, errp);
-}
-
-static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
-                                   void *opaque, Error **errp)
-{
-    visit_type_uint32(v, name, (uint32_t *)opaque, errp);
-}
-
-static void spapr_get_kernel_addr(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    visit_type_uint64(v, name, (uint64_t *)opaque, errp);
-}
-
-static void spapr_set_kernel_addr(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    visit_type_uint64(v, name, (uint64_t *)opaque, errp);
-}
-
 static char *spapr_get_ic_mode(Object *obj, Error **errp)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3322,62 +3306,53 @@ static void spapr_instance_init(Object *obj)
     spapr->htab_fd = -1;
     spapr->use_hotplug_event_source = true;
     object_property_add_str(obj, "kvm-type",
-                            spapr_get_kvm_type, spapr_set_kvm_type, NULL);
+                            spapr_get_kvm_type, spapr_set_kvm_type);
     object_property_set_description(obj, "kvm-type",
-                                    "Specifies the KVM virtualization mode (HV, PR)",
-                                    NULL);
+                                    "Specifies the KVM virtualization mode (HV, PR)");
     object_property_add_bool(obj, "modern-hotplug-events",
                             spapr_get_modern_hotplug_events,
-                            spapr_set_modern_hotplug_events,
-                            NULL);
+                            spapr_set_modern_hotplug_events);
     object_property_set_description(obj, "modern-hotplug-events",
                                     "Use dedicated hotplug event mechanism in"
                                     " place of standard EPOW events when possible"
-                                    " (required for memory hot-unplug support)",
-                                    NULL);
+                                    " (required for memory hot-unplug support)");
     ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
-                            "Maximum permitted CPU compatibility mode",
-                            &error_fatal);
+                            "Maximum permitted CPU compatibility mode");
 
     object_property_add_str(obj, "resize-hpt",
-                            spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
+                            spapr_get_resize_hpt, spapr_set_resize_hpt);
     object_property_set_description(obj, "resize-hpt",
-                                    "Resizing of the Hash Page Table (enabled, disabled, required)",
-                                    NULL);
-    object_property_add(obj, "vsmt", "uint32", spapr_get_vsmt,
-                        spapr_set_vsmt, NULL, &spapr->vsmt, &error_abort);
+                                    "Resizing of the Hash Page Table (enabled, disabled, required)");
+    object_property_add_uint32_ptr(obj, "vsmt",
+                                   &spapr->vsmt, OBJ_PROP_FLAG_READWRITE);
     object_property_set_description(obj, "vsmt",
                                     "Virtual SMT: KVM behaves as if this were"
-                                    " the host's SMT mode", &error_abort);
+                                    " the host's SMT mode");
+
     object_property_add_bool(obj, "vfio-no-msix-emulation",
-                             spapr_get_msix_emulation, NULL, NULL);
+                             spapr_get_msix_emulation, NULL);
 
-    object_property_add(obj, "kernel-addr", "uint64", spapr_get_kernel_addr,
-                        spapr_set_kernel_addr, NULL, &spapr->kernel_addr,
-                        &error_abort);
+    object_property_add_uint64_ptr(obj, "kernel-addr",
+                                   &spapr->kernel_addr, OBJ_PROP_FLAG_READWRITE);
     object_property_set_description(obj, "kernel-addr",
                                     stringify(KERNEL_LOAD_ADDR)
-                                    " for -kernel is the default",
-                                    NULL);
+                                    " for -kernel is the default");
     spapr->kernel_addr = KERNEL_LOAD_ADDR;
     /* The machine class defines the default interrupt controller mode */
     spapr->irq = smc->irq;
     object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
-                            spapr_set_ic_mode, NULL);
+                            spapr_set_ic_mode);
     object_property_set_description(obj, "ic-mode",
-                 "Specifies the interrupt controller mode (xics, xive, dual)",
-                 NULL);
+                 "Specifies the interrupt controller mode (xics, xive, dual)");
 
     object_property_add_str(obj, "host-model",
-        spapr_get_host_model, spapr_set_host_model,
-        &error_abort);
+        spapr_get_host_model, spapr_set_host_model);
     object_property_set_description(obj, "host-model",
-        "Host model to advertise in guest device tree", &error_abort);
+        "Host model to advertise in guest device tree");
     object_property_add_str(obj, "host-serial",
-        spapr_get_host_serial, spapr_set_host_serial,
-        &error_abort);
+        spapr_get_host_serial, spapr_set_host_serial);
     object_property_set_description(obj, "host-serial",
-        "Host serial number to advertise in guest device tree", &error_abort);
+        "Host serial number to advertise in guest device tree");
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -3389,8 +3364,31 @@ static void spapr_machine_finalizefn(Object *obj)
 
 void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
 {
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
     cpu_synchronize_state(cs);
+    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
+    if (spapr->fwnmi_system_reset_addr != -1) {
+        uint64_t rtas_addr, addr;
+
+        /* get rtas addr from fdt */
+        rtas_addr = spapr_get_rtas_addr();
+        if (!rtas_addr) {
+            qemu_system_guest_panicked(NULL);
+            return;
+        }
+
+        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
+        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
+        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
+        env->gpr[3] = addr;
+    }
     ppc_cpu_do_system_reset(cs);
+    if (spapr->fwnmi_system_reset_addr != -1) {
+        env->nip = spapr->fwnmi_system_reset_addr;
+    }
 }
 
 static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
@@ -3411,8 +3409,8 @@ int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
     addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
     node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
                                     &error_abort);
-    *fdt_start_offset = spapr_populate_memory_node(fdt, node, addr,
-                                                   SPAPR_MEMORY_BLOCK_SIZE);
+    *fdt_start_offset = spapr_dt_memory_node(fdt, node, addr,
+                                             SPAPR_MEMORY_BLOCK_SIZE);
     return 0;
 }
 
@@ -3673,7 +3671,7 @@ static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
     SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
 
     pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev));
-    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
+    qdev_unrealize(dev);
     spapr_pending_dimm_unplugs_remove(spapr, ds);
 }
 
@@ -3766,7 +3764,7 @@ static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
 
     assert(core_slot);
     core_slot->cpu = NULL;
-    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
+    qdev_unrealize(dev);
 }
 
 static
@@ -3813,7 +3811,7 @@ int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
     offset = fdt_add_subnode(fdt, 0, nodename);
     g_free(nodename);
 
-    spapr_populate_cpu_dt(cs, fdt, offset, spapr);
+    spapr_dt_cpu(cs, fdt, offset, spapr);
 
     *fdt_start_offset = offset;
     return 0;
@@ -4016,7 +4014,7 @@ static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     /* hotplug hooks should check it's enabled before getting this far */
     assert(drc);
 
-    spapr_drc_attach(drc, DEVICE(dev), &local_err);
+    spapr_drc_attach(drc, dev, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -4039,7 +4037,7 @@ void spapr_phb_release(DeviceState *dev)
 
 static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
 {
-    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
+    qdev_unrealize(dev);
 }
 
 static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev,
@@ -4075,7 +4073,7 @@ static void spapr_tpm_proxy_unplug(HotplugHandler *hotplug_dev, DeviceState *dev
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
 
-    object_property_set_bool(OBJECT(dev), false, "realized", NULL);
+    qdev_unrealize(dev);
     object_unparent(OBJECT(dev));
     spapr->tpm_proxy = NULL;
 }
@@ -4526,8 +4524,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
-    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
-    spapr_caps_add_properties(smc, &error_abort);
+    smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+    spapr_caps_add_properties(smc);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
     smc->linux_pci_probe = true;
@@ -4560,7 +4558,7 @@ static const TypeInfo spapr_machine_info = {
 static void spapr_machine_latest_class_options(MachineClass *mc)
 {
     mc->alias = "pseries";
-    mc->is_default = 1;
+    mc->is_default = true;
 }
 
 #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
@@ -4584,15 +4582,26 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
     }                                                                \
     type_init(spapr_machine_register_##suffix)
 
+/*
+ * pseries-5.1
+ */
+static void spapr_machine_5_1_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(5_1, "5.1", true);
+
 /*
  * pseries-5.0
  */
 static void spapr_machine_5_0_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    spapr_machine_5_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
 }
 
-DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
+DEFINE_SPAPR_MACHINE(5_0, "5.0", false);
 
 /*
  * pseries-4.2
@@ -4604,7 +4613,8 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
     spapr_machine_5_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF;
+    smc->rma_limit = 16 * GiB;
     mc->nvdimm_supported = false;
 }