[mirror_qemu.git] / hw / ppc / spapr_numa.c

/*
 * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
 *
 * Copyright IBM Corp. 2020
 *
 * Authors:
 *  Daniel Henrique Barboza      <danielhb413@gmail.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 */

#include "qemu/osdep.h"
#include "qemu-common.h"
#include "hw/ppc/spapr_numa.h"
#include "hw/pci-host/spapr.h"
#include "hw/ppc/fdt.h"

/* Moved from hw/ppc/spapr_pci_nvlink2.c */
#define SPAPR_GPU_NUMA_ID           (cpu_to_be32(1))

void spapr_numa_associativity_init(SpaprMachineState *spapr,
                                   MachineState *machine)
{
    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    int nb_numa_nodes = machine->numa_state->num_nodes;
    int i, j, max_nodes_with_gpus;

    /*
     * For all associativity arrays: first position is the size,
     * position MAX_DISTANCE_REF_POINTS is always the numa_id,
     * represented by the index 'i'.
     *
     * This will break on sparse NUMA setups, when/if QEMU starts
     * to support it, because there will be no more guarantee that
     * 'i' will be a valid node_id set by the user.
     */
    for (i = 0; i < nb_numa_nodes; i++) {
        spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
        spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
    }

    /*
     * Initialize NVLink GPU associativity arrays. We know that
     * the first GPU will take the first available NUMA id, and
     * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
     * At this point we're not sure if there are GPUs or not, but
     * let's initialize the associativity arrays and allow NVLink
     * GPUs to be handled like regular NUMA nodes later on.
     */
    max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;

    for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
        spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);

        for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
            uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
                                 SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
            spapr->numa_assoc_array[i][j] = gpu_assoc;
        }

        spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
    }
}

void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
                                       int offset, int nodeid)
{
    _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
                      spapr->numa_assoc_array[nodeid],
                      sizeof(spapr->numa_assoc_array[nodeid]))));
}

static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
                                           PowerPCCPU *cpu)
{
    uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
    int index = spapr_get_vcpu_id(cpu);

    /*
     * VCPUs have an extra 'cpu_id' value in ibm,associativity
     * compared to other resources. Increment the size at index
     * 0, put cpu_id last, then copy the remaining associativity
     * domains.
     */
    vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
    vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
    memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
           (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));

    return vcpu_assoc;
}

int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
                            int offset, PowerPCCPU *cpu)
{
    g_autofree uint32_t *vcpu_assoc = NULL;

    vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);

    /* Advertise NUMA via ibm,associativity */
    return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
                       VCPU_ASSOC_SIZE * sizeof(uint32_t));
}


int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
                                         int offset)
{
    MachineState *machine = MACHINE(spapr);
    int nb_numa_nodes = machine->numa_state->num_nodes;
    int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
    uint32_t *int_buf, *cur_index, buf_len;
    int ret, i;

    /* ibm,associativity-lookup-arrays */
    buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
    cur_index = int_buf = g_malloc0(buf_len);
    int_buf[0] = cpu_to_be32(nr_nodes);
     /* Number of entries per associativity list */
    int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
    cur_index += 2;
    for (i = 0; i < nr_nodes; i++) {
        /*
         * For the lookup-array we use the ibm,associativity array,
         * from numa_assoc_array. without the first element (size).
         */
        uint32_t *associativity = spapr->numa_assoc_array[i];
        memcpy(cur_index, ++associativity,
               sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
        cur_index += MAX_DISTANCE_REF_POINTS;
    }
    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
                      (cur_index - int_buf) * sizeof(uint32_t));
    g_free(int_buf);

    return ret;
}

/*
 * Helper that writes ibm,associativity-reference-points and
 * max-associativity-domains in the RTAS pointed by @rtas
 * in the DT @fdt.
 */
void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
{
    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    uint32_t refpoints[] = {
        cpu_to_be32(0x4),
        cpu_to_be32(0x4),
        cpu_to_be32(0x2),
    };
    uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
    uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
    uint32_t maxdomains[] = {
        cpu_to_be32(4),
        maxdomain,
        maxdomain,
        maxdomain,
        cpu_to_be32(spapr->gpu_numa_id),
    };

    if (smc->pre_5_1_assoc_refpoints) {
        nr_refpoints = 2;
    }

    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
                     refpoints, nr_refpoints * sizeof(refpoints[0])));

    _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
                     maxdomains, sizeof(maxdomains)));
}

static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
                                              SpaprMachineState *spapr,
                                              target_ulong opcode,
                                              target_ulong *args)
{
    g_autofree uint32_t *vcpu_assoc = NULL;
    target_ulong flags = args[0];
    target_ulong procno = args[1];
    PowerPCCPU *tcpu;
    int idx, assoc_idx;

    /* only support procno from H_REGISTER_VPA */
    if (flags != 0x1) {
        return H_FUNCTION;
    }

    tcpu = spapr_find_cpu(procno);
    if (tcpu == NULL) {
        return H_P2;
    }

    /*
     * Given that we want to be flexible with the sizes and indexes,
     * we must consider that there is a hard limit of how many
     * associativities domain we can fit in R4 up to R9, which would be
     * 12 associativity domains for vcpus. Assert and bail if that's
     * not the case.
     */
    G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);

    vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
    /* assoc_idx starts at 1 to skip associativity size */
    assoc_idx = 1;

#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
                             ((uint64_t)(b) & 0xffffffff))

    for (idx = 0; idx < 6; idx++) {
        int32_t a, b;

        /*
         * vcpu_assoc[] will contain the associativity domains for tcpu,
         * including tcpu->node_id and procno, meaning that we don't
         * need to use these variables here.
         *
         * We'll read 2 values at a time to fill up the ASSOCIATIVITY()
         * macro. The ternary will fill the remaining registers with -1
         * after we went through vcpu_assoc[].
         */
        a = assoc_idx < VCPU_ASSOC_SIZE ?
            be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
        b = assoc_idx < VCPU_ASSOC_SIZE ?
            be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;

        args[idx] = ASSOCIATIVITY(a, b);
    }
#undef ASSOCIATIVITY

    return H_SUCCESS;
}

static void spapr_numa_register_types(void)
{
    /* Virtual Processor Home Node */
    spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
                             h_home_node_associativity);
}

type_init(spapr_numa_register_types)
Commit	Line	Data
1eee9950 DHB	1	/*
	2	* QEMU PowerPC pSeries Logical Partition NUMA associativity handling
	3	*
	4	* Copyright IBM Corp. 2020
	5	*
	6	* Authors:
	7	* Daniel Henrique Barboza <danielhb413@gmail.com>
	8	*
	9	* This work is licensed under the terms of the GNU GPL, version 2 or later.
	10	* See the COPYING file in the top-level directory.
	11	*/
	12
	13	#include "qemu/osdep.h"
	14	#include "qemu-common.h"
	15	#include "hw/ppc/spapr_numa.h"
dd7e1d7a	16	#include "hw/pci-host/spapr.h"
1eee9950 DHB	17	#include "hw/ppc/fdt.h"
1eee9950 DHB	18
dd7e1d7a DHB	19	/* Moved from hw/ppc/spapr_pci_nvlink2.c */
dd7e1d7a DHB	20	#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
f1aa45ff DHB	21
	22	void spapr_numa_associativity_init(SpaprMachineState *spapr,
	23	MachineState *machine)
	24	{
dd7e1d7a	25	SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
f1aa45ff	26	int nb_numa_nodes = machine->numa_state->num_nodes;
dd7e1d7a	27	int i, j, max_nodes_with_gpus;
f1aa45ff DHB	28
	29	/*
	30	* For all associativity arrays: first position is the size,
	31	* position MAX_DISTANCE_REF_POINTS is always the numa_id,
	32	* represented by the index 'i'.
	33	*
	34	* This will break on sparse NUMA setups, when/if QEMU starts
	35	* to support it, because there will be no more guarantee that
	36	* 'i' will be a valid node_id set by the user.
	37	*/
	38	for (i = 0; i < nb_numa_nodes; i++) {
	39	spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
	40	spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
	41	}
dd7e1d7a DHB	42
	43	/*
	44	* Initialize NVLink GPU associativity arrays. We know that
	45	* the first GPU will take the first available NUMA id, and
	46	* we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
	47	* At this point we're not sure if there are GPUs or not, but
	48	* let's initialize the associativity arrays and allow NVLink
	49	* GPUs to be handled like regular NUMA nodes later on.
	50	*/
	51	max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
	52
	53	for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
	54	spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
	55
	56	for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
	57	uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
	58	SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
	59	spapr->numa_assoc_array[i][j] = gpu_assoc;
	60	}
	61
	62	spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
	63	}
f1aa45ff DHB	64	}
	65
	66	void spapr_numa_write_associativity_dt(SpaprMachineState spapr, void fdt,
	67	int offset, int nodeid)
	68	{
	69	_FDT((fdt_setprop(fdt, offset, "ibm,associativity",
	70	spapr->numa_assoc_array[nodeid],
	71	sizeof(spapr->numa_assoc_array[nodeid]))));
8f86a408 DHB	72	}
8f86a408 DHB	73
d370f9cf DHB	74	static uint32_t spapr_numa_get_vcpu_assoc(SpaprMachineState spapr,
d370f9cf DHB	75	PowerPCCPU *cpu)
8f86a408	76	{
d370f9cf	77	uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE);
8f86a408	78	int index = spapr_get_vcpu_id(cpu);
8f86a408 DHB	79
	80	/*
	81	* VCPUs have an extra 'cpu_id' value in ibm,associativity
	82	* compared to other resources. Increment the size at index
d370f9cf DHB	83	* 0, put cpu_id last, then copy the remaining associativity
d370f9cf DHB	84	* domains.
8f86a408 DHB	85	*/
8f86a408 DHB	86	vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1);
d370f9cf DHB	87	vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index);
	88	memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1,
	89	(VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t));
8f86a408	90
d370f9cf DHB	91	return vcpu_assoc;
	92	}
	93
	94	int spapr_numa_fixup_cpu_dt(SpaprMachineState spapr, void fdt,
	95	int offset, PowerPCCPU *cpu)
	96	{
	97	g_autofree uint32_t *vcpu_assoc = NULL;
8f86a408	98
d370f9cf	99	vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
8f86a408 DHB	100
8f86a408 DHB	101	/* Advertise NUMA via ibm,associativity */
d370f9cf DHB	102	return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
d370f9cf DHB	103	VCPU_ASSOC_SIZE * sizeof(uint32_t));
f1aa45ff DHB	104	}
f1aa45ff DHB	105
0ee52012 DHB	106
	107	int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState spapr, void fdt,
	108	int offset)
	109	{
	110	MachineState *machine = MACHINE(spapr);
	111	int nb_numa_nodes = machine->numa_state->num_nodes;
	112	int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
	113	uint32_t int_buf, cur_index, buf_len;
	114	int ret, i;
	115
	116	/* ibm,associativity-lookup-arrays */
	117	buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t);
	118	cur_index = int_buf = g_malloc0(buf_len);
	119	int_buf[0] = cpu_to_be32(nr_nodes);
	120	/* Number of entries per associativity list */
	121	int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);
	122	cur_index += 2;
	123	for (i = 0; i < nr_nodes; i++) {
	124	/*
	125	* For the lookup-array we use the ibm,associativity array,
	126	* from numa_assoc_array. without the first element (size).
	127	*/
	128	uint32_t *associativity = spapr->numa_assoc_array[i];
	129	memcpy(cur_index, ++associativity,
	130	sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS);
	131	cur_index += MAX_DISTANCE_REF_POINTS;
	132	}
	133	ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
	134	(cur_index - int_buf) * sizeof(uint32_t));
	135	g_free(int_buf);
	136
	137	return ret;
	138	}
	139
1eee9950 DHB	140	/*
	141	* Helper that writes ibm,associativity-reference-points and
	142	* max-associativity-domains in the RTAS pointed by @rtas
	143	* in the DT @fdt.
	144	*/
	145	void spapr_numa_write_rtas_dt(SpaprMachineState spapr, void fdt, int rtas)
	146	{
	147	SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
	148	uint32_t refpoints[] = {
	149	cpu_to_be32(0x4),
	150	cpu_to_be32(0x4),
	151	cpu_to_be32(0x2),
	152	};
	153	uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
	154	uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
	155	uint32_t maxdomains[] = {
	156	cpu_to_be32(4),
	157	maxdomain,
	158	maxdomain,
	159	maxdomain,
	160	cpu_to_be32(spapr->gpu_numa_id),
	161	};
	162
	163	if (smc->pre_5_1_assoc_refpoints) {
	164	nr_refpoints = 2;
	165	}
	166
	167	_FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
	168	refpoints, nr_refpoints * sizeof(refpoints[0])));
	169
	170	_FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
	171	maxdomains, sizeof(maxdomains)));
	172	}
f8a13fc3 DHB	173
	174	static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
	175	SpaprMachineState *spapr,
	176	target_ulong opcode,
	177	target_ulong *args)
	178	{
876ab8d8	179	g_autofree uint32_t *vcpu_assoc = NULL;
f8a13fc3 DHB	180	target_ulong flags = args[0];
	181	target_ulong procno = args[1];
	182	PowerPCCPU *tcpu;
876ab8d8	183	int idx, assoc_idx;
f8a13fc3 DHB	184
	185	/* only support procno from H_REGISTER_VPA */
	186	if (flags != 0x1) {
	187	return H_FUNCTION;
	188	}
	189
	190	tcpu = spapr_find_cpu(procno);
	191	if (tcpu == NULL) {
	192	return H_P2;
	193	}
	194
876ab8d8 DHB	195	/*
	196	* Given that we want to be flexible with the sizes and indexes,
	197	* we must consider that there is a hard limit of how many
	198	* associativities domain we can fit in R4 up to R9, which would be
	199	* 12 associativity domains for vcpus. Assert and bail if that's
	200	* not the case.
	201	*/
	202	G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12);
	203
	204	vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
	205	/* assoc_idx starts at 1 to skip associativity size */
	206	assoc_idx = 1;
f8a13fc3	207
f8a13fc3 DHB	208	#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) \| \
f8a13fc3 DHB	209	((uint64_t)(b) & 0xffffffff))
876ab8d8 DHB	210
	211	for (idx = 0; idx < 6; idx++) {
	212	int32_t a, b;
	213
	214	/*
	215	* vcpu_assoc[] will contain the associativity domains for tcpu,
	216	* including tcpu->node_id and procno, meaning that we don't
	217	* need to use these variables here.
	218	*
	219	* We'll read 2 values at a time to fill up the ASSOCIATIVITY()
	220	* macro. The ternary will fill the remaining registers with -1
	221	* after we went through vcpu_assoc[].
	222	*/
	223	a = assoc_idx < VCPU_ASSOC_SIZE ?
	224	be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
	225	b = assoc_idx < VCPU_ASSOC_SIZE ?
	226	be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
	227
	228	args[idx] = ASSOCIATIVITY(a, b);
f8a13fc3 DHB	229	}
	230	#undef ASSOCIATIVITY
	231
	232	return H_SUCCESS;
	233	}
	234
	235	static void spapr_numa_register_types(void)
	236	{
	237	/* Virtual Processor Home Node */
	238	spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
	239	h_home_node_associativity);
	240	}
	241
	242	type_init(spapr_numa_register_types)