]>
Commit | Line | Data |
---|---|---|
1eee9950 DHB |
1 | /* |
2 | * QEMU PowerPC pSeries Logical Partition NUMA associativity handling | |
3 | * | |
4 | * Copyright IBM Corp. 2020 | |
5 | * | |
6 | * Authors: | |
7 | * Daniel Henrique Barboza <danielhb413@gmail.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
14 | #include "qemu-common.h" | |
15 | #include "hw/ppc/spapr_numa.h" | |
dd7e1d7a | 16 | #include "hw/pci-host/spapr.h" |
1eee9950 DHB |
17 | #include "hw/ppc/fdt.h" |
18 | ||
dd7e1d7a DHB |
19 | /* Moved from hw/ppc/spapr_pci_nvlink2.c */ |
20 | #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) | |
f1aa45ff DHB |
21 | |
22 | void spapr_numa_associativity_init(SpaprMachineState *spapr, | |
23 | MachineState *machine) | |
24 | { | |
dd7e1d7a | 25 | SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); |
f1aa45ff | 26 | int nb_numa_nodes = machine->numa_state->num_nodes; |
dd7e1d7a | 27 | int i, j, max_nodes_with_gpus; |
f1aa45ff DHB |
28 | |
29 | /* | |
30 | * For all associativity arrays: first position is the size, | |
31 | * position MAX_DISTANCE_REF_POINTS is always the numa_id, | |
32 | * represented by the index 'i'. | |
33 | * | |
34 | * This will break on sparse NUMA setups, when/if QEMU starts | |
35 | * to support it, because there will be no more guarantee that | |
36 | * 'i' will be a valid node_id set by the user. | |
37 | */ | |
38 | for (i = 0; i < nb_numa_nodes; i++) { | |
39 | spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); | |
40 | spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); | |
41 | } | |
dd7e1d7a DHB |
42 | |
43 | /* | |
44 | * Initialize NVLink GPU associativity arrays. We know that | |
45 | * the first GPU will take the first available NUMA id, and | |
46 | * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine. | |
47 | * At this point we're not sure if there are GPUs or not, but | |
48 | * let's initialize the associativity arrays and allow NVLink | |
49 | * GPUs to be handled like regular NUMA nodes later on. | |
50 | */ | |
51 | max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; | |
52 | ||
53 | for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { | |
54 | spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); | |
55 | ||
56 | for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { | |
57 | uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? | |
58 | SPAPR_GPU_NUMA_ID : cpu_to_be32(i); | |
59 | spapr->numa_assoc_array[i][j] = gpu_assoc; | |
60 | } | |
61 | ||
62 | spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); | |
63 | } | |
f1aa45ff DHB |
64 | } |
65 | ||
66 | void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, | |
67 | int offset, int nodeid) | |
68 | { | |
69 | _FDT((fdt_setprop(fdt, offset, "ibm,associativity", | |
70 | spapr->numa_assoc_array[nodeid], | |
71 | sizeof(spapr->numa_assoc_array[nodeid])))); | |
8f86a408 DHB |
72 | } |
73 | ||
d370f9cf DHB |
74 | static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, |
75 | PowerPCCPU *cpu) | |
8f86a408 | 76 | { |
d370f9cf | 77 | uint32_t *vcpu_assoc = g_new(uint32_t, VCPU_ASSOC_SIZE); |
8f86a408 | 78 | int index = spapr_get_vcpu_id(cpu); |
8f86a408 DHB |
79 | |
80 | /* | |
81 | * VCPUs have an extra 'cpu_id' value in ibm,associativity | |
82 | * compared to other resources. Increment the size at index | |
d370f9cf DHB |
83 | * 0, put cpu_id last, then copy the remaining associativity |
84 | * domains. | |
8f86a408 DHB |
85 | */ |
86 | vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); | |
d370f9cf DHB |
87 | vcpu_assoc[VCPU_ASSOC_SIZE - 1] = cpu_to_be32(index); |
88 | memcpy(vcpu_assoc + 1, spapr->numa_assoc_array[cpu->node_id] + 1, | |
89 | (VCPU_ASSOC_SIZE - 2) * sizeof(uint32_t)); | |
8f86a408 | 90 | |
d370f9cf DHB |
91 | return vcpu_assoc; |
92 | } | |
93 | ||
94 | int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, | |
95 | int offset, PowerPCCPU *cpu) | |
96 | { | |
97 | g_autofree uint32_t *vcpu_assoc = NULL; | |
8f86a408 | 98 | |
d370f9cf | 99 | vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu); |
8f86a408 DHB |
100 | |
101 | /* Advertise NUMA via ibm,associativity */ | |
d370f9cf DHB |
102 | return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc, |
103 | VCPU_ASSOC_SIZE * sizeof(uint32_t)); | |
f1aa45ff DHB |
104 | } |
105 | ||
0ee52012 DHB |
106 | |
107 | int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, | |
108 | int offset) | |
109 | { | |
110 | MachineState *machine = MACHINE(spapr); | |
111 | int nb_numa_nodes = machine->numa_state->num_nodes; | |
112 | int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; | |
113 | uint32_t *int_buf, *cur_index, buf_len; | |
114 | int ret, i; | |
115 | ||
116 | /* ibm,associativity-lookup-arrays */ | |
117 | buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t); | |
118 | cur_index = int_buf = g_malloc0(buf_len); | |
119 | int_buf[0] = cpu_to_be32(nr_nodes); | |
120 | /* Number of entries per associativity list */ | |
121 | int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); | |
122 | cur_index += 2; | |
123 | for (i = 0; i < nr_nodes; i++) { | |
124 | /* | |
125 | * For the lookup-array we use the ibm,associativity array, | |
126 | * from numa_assoc_array. without the first element (size). | |
127 | */ | |
128 | uint32_t *associativity = spapr->numa_assoc_array[i]; | |
129 | memcpy(cur_index, ++associativity, | |
130 | sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS); | |
131 | cur_index += MAX_DISTANCE_REF_POINTS; | |
132 | } | |
133 | ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, | |
134 | (cur_index - int_buf) * sizeof(uint32_t)); | |
135 | g_free(int_buf); | |
136 | ||
137 | return ret; | |
138 | } | |
139 | ||
1eee9950 DHB |
140 | /* |
141 | * Helper that writes ibm,associativity-reference-points and | |
142 | * max-associativity-domains in the RTAS pointed by @rtas | |
143 | * in the DT @fdt. | |
144 | */ | |
145 | void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) | |
146 | { | |
147 | SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); | |
148 | uint32_t refpoints[] = { | |
149 | cpu_to_be32(0x4), | |
150 | cpu_to_be32(0x4), | |
151 | cpu_to_be32(0x2), | |
152 | }; | |
153 | uint32_t nr_refpoints = ARRAY_SIZE(refpoints); | |
154 | uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); | |
155 | uint32_t maxdomains[] = { | |
156 | cpu_to_be32(4), | |
157 | maxdomain, | |
158 | maxdomain, | |
159 | maxdomain, | |
160 | cpu_to_be32(spapr->gpu_numa_id), | |
161 | }; | |
162 | ||
163 | if (smc->pre_5_1_assoc_refpoints) { | |
164 | nr_refpoints = 2; | |
165 | } | |
166 | ||
167 | _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", | |
168 | refpoints, nr_refpoints * sizeof(refpoints[0]))); | |
169 | ||
170 | _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", | |
171 | maxdomains, sizeof(maxdomains))); | |
172 | } | |
f8a13fc3 DHB |
173 | |
174 | static target_ulong h_home_node_associativity(PowerPCCPU *cpu, | |
175 | SpaprMachineState *spapr, | |
176 | target_ulong opcode, | |
177 | target_ulong *args) | |
178 | { | |
876ab8d8 | 179 | g_autofree uint32_t *vcpu_assoc = NULL; |
f8a13fc3 DHB |
180 | target_ulong flags = args[0]; |
181 | target_ulong procno = args[1]; | |
182 | PowerPCCPU *tcpu; | |
876ab8d8 | 183 | int idx, assoc_idx; |
f8a13fc3 DHB |
184 | |
185 | /* only support procno from H_REGISTER_VPA */ | |
186 | if (flags != 0x1) { | |
187 | return H_FUNCTION; | |
188 | } | |
189 | ||
190 | tcpu = spapr_find_cpu(procno); | |
191 | if (tcpu == NULL) { | |
192 | return H_P2; | |
193 | } | |
194 | ||
876ab8d8 DHB |
195 | /* |
196 | * Given that we want to be flexible with the sizes and indexes, | |
197 | * we must consider that there is a hard limit of how many | |
198 | * associativities domain we can fit in R4 up to R9, which would be | |
199 | * 12 associativity domains for vcpus. Assert and bail if that's | |
200 | * not the case. | |
201 | */ | |
202 | G_STATIC_ASSERT((VCPU_ASSOC_SIZE - 1) <= 12); | |
203 | ||
204 | vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu); | |
205 | /* assoc_idx starts at 1 to skip associativity size */ | |
206 | assoc_idx = 1; | |
f8a13fc3 | 207 | |
f8a13fc3 DHB |
208 | #define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ |
209 | ((uint64_t)(b) & 0xffffffff)) | |
876ab8d8 DHB |
210 | |
211 | for (idx = 0; idx < 6; idx++) { | |
212 | int32_t a, b; | |
213 | ||
214 | /* | |
215 | * vcpu_assoc[] will contain the associativity domains for tcpu, | |
216 | * including tcpu->node_id and procno, meaning that we don't | |
217 | * need to use these variables here. | |
218 | * | |
219 | * We'll read 2 values at a time to fill up the ASSOCIATIVITY() | |
220 | * macro. The ternary will fill the remaining registers with -1 | |
221 | * after we went through vcpu_assoc[]. | |
222 | */ | |
223 | a = assoc_idx < VCPU_ASSOC_SIZE ? | |
224 | be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; | |
225 | b = assoc_idx < VCPU_ASSOC_SIZE ? | |
226 | be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; | |
227 | ||
228 | args[idx] = ASSOCIATIVITY(a, b); | |
f8a13fc3 DHB |
229 | } |
230 | #undef ASSOCIATIVITY | |
231 | ||
232 | return H_SUCCESS; | |
233 | } | |
234 | ||
235 | static void spapr_numa_register_types(void) | |
236 | { | |
237 | /* Virtual Processor Home Node */ | |
238 | spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, | |
239 | h_home_node_associativity); | |
240 | } | |
241 | ||
242 | type_init(spapr_numa_register_types) |