]>
Commit | Line | Data |
---|---|---|
38afd772 CLG |
1 | /* |
2 | * QEMU PowerPC sPAPR XIVE interrupt controller model | |
3 | * | |
4 | * Copyright (c) 2017-2019, IBM Corporation. | |
5 | * | |
6 | * This code is licensed under the GPL version 2 or later. See the | |
7 | * COPYING file in the top-level directory. | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "qemu/log.h" | |
12 | #include "qemu/error-report.h" | |
13 | #include "qapi/error.h" | |
14 | #include "target/ppc/cpu.h" | |
15 | #include "sysemu/cpus.h" | |
16 | #include "sysemu/kvm.h" | |
54d31236 | 17 | #include "sysemu/runstate.h" |
38afd772 | 18 | #include "hw/ppc/spapr.h" |
277dd3d7 | 19 | #include "hw/ppc/spapr_cpu_core.h" |
38afd772 CLG |
20 | #include "hw/ppc/spapr_xive.h" |
21 | #include "hw/ppc/xive.h" | |
22 | #include "kvm_ppc.h" | |
4e960974 | 23 | #include "trace.h" |
38afd772 CLG |
24 | |
25 | #include <sys/ioctl.h> | |
26 | ||
27 | /* | |
28 | * Helpers for CPU hotplug | |
29 | * | |
30 | * TODO: make a common KVMEnabledCPU layer for XICS and XIVE | |
31 | */ | |
32 | typedef struct KVMEnabledCPU { | |
33 | unsigned long vcpu_id; | |
34 | QLIST_ENTRY(KVMEnabledCPU) node; | |
35 | } KVMEnabledCPU; | |
36 | ||
37 | static QLIST_HEAD(, KVMEnabledCPU) | |
38 | kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); | |
39 | ||
6d24795e | 40 | static bool kvm_cpu_is_enabled(CPUState *cs) |
38afd772 CLG |
41 | { |
42 | KVMEnabledCPU *enabled_cpu; | |
6d24795e | 43 | unsigned long vcpu_id = kvm_arch_vcpu_id(cs); |
38afd772 CLG |
44 | |
45 | QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { | |
46 | if (enabled_cpu->vcpu_id == vcpu_id) { | |
47 | return true; | |
48 | } | |
49 | } | |
50 | return false; | |
51 | } | |
52 | ||
53 | static void kvm_cpu_enable(CPUState *cs) | |
54 | { | |
55 | KVMEnabledCPU *enabled_cpu; | |
56 | unsigned long vcpu_id = kvm_arch_vcpu_id(cs); | |
57 | ||
58 | enabled_cpu = g_malloc(sizeof(*enabled_cpu)); | |
59 | enabled_cpu->vcpu_id = vcpu_id; | |
60 | QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); | |
61 | } | |
62 | ||
56b11587 CLG |
63 | static void kvm_cpu_disable_all(void) |
64 | { | |
65 | KVMEnabledCPU *enabled_cpu, *next; | |
66 | ||
67 | QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { | |
68 | QLIST_REMOVE(enabled_cpu, node); | |
69 | g_free(enabled_cpu); | |
70 | } | |
71 | } | |
72 | ||
38afd772 CLG |
73 | /* |
74 | * XIVE Thread Interrupt Management context (KVM) | |
75 | */ | |
277dd3d7 | 76 | |
5fa36b7f | 77 | int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) |
277dd3d7 | 78 | { |
74e51a38 | 79 | SpaprXive *xive = SPAPR_XIVE(tctx->xptr); |
277dd3d7 CLG |
80 | uint64_t state[2]; |
81 | int ret; | |
82 | ||
a4907119 | 83 | assert(xive->fd != -1); |
310cda5b | 84 | |
277dd3d7 CLG |
85 | /* word0 and word1 of the OS ring. */ |
86 | state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); | |
87 | ||
88 | ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); | |
89 | if (ret != 0) { | |
5fa36b7f | 90 | error_setg_errno(errp, -ret, |
277dd3d7 CLG |
91 | "XIVE: could not restore KVM state of CPU %ld", |
92 | kvm_arch_vcpu_id(tctx->cs)); | |
5fa36b7f | 93 | return ret; |
277dd3d7 | 94 | } |
5fa36b7f GK |
95 | |
96 | return 0; | |
277dd3d7 CLG |
97 | } |
98 | ||
5fa36b7f | 99 | int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) |
7bfc759c | 100 | { |
74e51a38 | 101 | SpaprXive *xive = SPAPR_XIVE(tctx->xptr); |
7bfc759c CLG |
102 | uint64_t state[2] = { 0 }; |
103 | int ret; | |
104 | ||
a4907119 | 105 | assert(xive->fd != -1); |
3bf84e99 | 106 | |
7bfc759c CLG |
107 | ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); |
108 | if (ret != 0) { | |
5fa36b7f | 109 | error_setg_errno(errp, -ret, |
7bfc759c CLG |
110 | "XIVE: could not capture KVM state of CPU %ld", |
111 | kvm_arch_vcpu_id(tctx->cs)); | |
5fa36b7f | 112 | return ret; |
7bfc759c CLG |
113 | } |
114 | ||
115 | /* word0 and word1 of the OS ring. */ | |
116 | *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; | |
5fa36b7f GK |
117 | |
118 | return 0; | |
7bfc759c CLG |
119 | } |
120 | ||
121 | typedef struct { | |
122 | XiveTCTX *tctx; | |
1118b6b7 GK |
123 | Error **errp; |
124 | int ret; | |
7bfc759c CLG |
125 | } XiveCpuGetState; |
126 | ||
127 | static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, | |
128 | run_on_cpu_data arg) | |
129 | { | |
130 | XiveCpuGetState *s = arg.host_ptr; | |
131 | ||
1118b6b7 | 132 | s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); |
7bfc759c CLG |
133 | } |
134 | ||
1118b6b7 | 135 | int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) |
7bfc759c CLG |
136 | { |
137 | XiveCpuGetState s = { | |
138 | .tctx = tctx, | |
1118b6b7 | 139 | .errp = errp, |
7bfc759c CLG |
140 | }; |
141 | ||
142 | /* | |
143 | * Kick the vCPU to make sure they are available for the KVM ioctl. | |
144 | */ | |
145 | run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, | |
146 | RUN_ON_CPU_HOST_PTR(&s)); | |
147 | ||
1118b6b7 | 148 | return s.ret; |
7bfc759c | 149 | } |
38afd772 | 150 | |
3885ca66 | 151 | int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) |
38afd772 | 152 | { |
3885ca66 | 153 | ERRP_GUARD(); |
74e51a38 | 154 | SpaprXive *xive = SPAPR_XIVE(tctx->xptr); |
38afd772 CLG |
155 | unsigned long vcpu_id; |
156 | int ret; | |
157 | ||
a4907119 | 158 | assert(xive->fd != -1); |
3bf84e99 | 159 | |
38afd772 | 160 | /* Check if CPU was hot unplugged and replugged. */ |
6d24795e | 161 | if (kvm_cpu_is_enabled(tctx->cs)) { |
3885ca66 | 162 | return 0; |
38afd772 CLG |
163 | } |
164 | ||
165 | vcpu_id = kvm_arch_vcpu_id(tctx->cs); | |
166 | ||
4e960974 CLG |
167 | trace_kvm_xive_cpu_connect(vcpu_id); |
168 | ||
38afd772 CLG |
169 | ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, |
170 | vcpu_id, 0); | |
171 | if (ret < 0) { | |
3885ca66 GK |
172 | error_setg_errno(errp, -ret, |
173 | "XIVE: unable to connect CPU%ld to KVM device", | |
174 | vcpu_id); | |
175 | if (ret == -ENOSPC) { | |
176 | error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", | |
74e51a38 | 177 | MACHINE(qdev_get_machine())->smp.max_cpus); |
74f23d43 | 178 | } |
3885ca66 | 179 | return ret; |
38afd772 CLG |
180 | } |
181 | ||
182 | kvm_cpu_enable(tctx->cs); | |
3885ca66 | 183 | return 0; |
38afd772 CLG |
184 | } |
185 | ||
186 | /* | |
187 | * XIVE Interrupt Source (KVM) | |
188 | */ | |
189 | ||
d55daadc GK |
190 | int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, |
191 | Error **errp) | |
0c575703 CLG |
192 | { |
193 | uint32_t end_idx; | |
194 | uint32_t end_blk; | |
195 | uint8_t priority; | |
196 | uint32_t server; | |
197 | bool masked; | |
198 | uint32_t eisn; | |
199 | uint64_t kvm_src; | |
0c575703 CLG |
200 | |
201 | assert(xive_eas_is_valid(eas)); | |
202 | ||
203 | end_idx = xive_get_field64(EAS_END_INDEX, eas->w); | |
204 | end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); | |
205 | eisn = xive_get_field64(EAS_END_DATA, eas->w); | |
206 | masked = xive_eas_is_masked(eas); | |
207 | ||
208 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
209 | ||
210 | kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & | |
211 | KVM_XIVE_SOURCE_PRIORITY_MASK; | |
212 | kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & | |
213 | KVM_XIVE_SOURCE_SERVER_MASK; | |
214 | kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & | |
215 | KVM_XIVE_SOURCE_MASKED_MASK; | |
216 | kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & | |
217 | KVM_XIVE_SOURCE_EISN_MASK; | |
218 | ||
d55daadc GK |
219 | return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, |
220 | &kvm_src, true, errp); | |
0c575703 CLG |
221 | } |
222 | ||
223 | void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) | |
224 | { | |
225 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, | |
226 | NULL, true, errp); | |
227 | } | |
228 | ||
38afd772 CLG |
229 | /* |
230 | * At reset, the interrupt sources are simply created and MASKED. We | |
231 | * only need to inform the KVM XIVE device about their type: LSI or | |
232 | * MSI. | |
233 | */ | |
e594c2ad | 234 | int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) |
38afd772 CLG |
235 | { |
236 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); | |
237 | uint64_t state = 0; | |
238 | ||
179abc1f CLG |
239 | trace_kvm_xive_source_reset(srcno); |
240 | ||
a4907119 | 241 | assert(xive->fd != -1); |
3bf84e99 | 242 | |
38afd772 CLG |
243 | if (xive_source_irq_is_lsi(xsrc, srcno)) { |
244 | state |= KVM_XIVE_LEVEL_SENSITIVE; | |
621f70d2 | 245 | if (xive_source_is_asserted(xsrc, srcno)) { |
38afd772 CLG |
246 | state |= KVM_XIVE_LEVEL_ASSERTED; |
247 | } | |
248 | } | |
249 | ||
e594c2ad DG |
250 | return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, |
251 | true, errp); | |
38afd772 CLG |
252 | } |
253 | ||
46407a25 | 254 | static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) |
38afd772 | 255 | { |
4c3539d4 | 256 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); |
38afd772 CLG |
257 | int i; |
258 | ||
6d24795e | 259 | for (i = 0; i < xsrc->nr_irqs; i++) { |
46407a25 | 260 | int ret; |
38afd772 | 261 | |
4c3539d4 CLG |
262 | if (!xive_eas_is_valid(&xive->eat[i])) { |
263 | continue; | |
264 | } | |
265 | ||
46407a25 GK |
266 | ret = kvmppc_xive_source_reset_one(xsrc, i, errp); |
267 | if (ret < 0) { | |
268 | return ret; | |
38afd772 CLG |
269 | } |
270 | } | |
46407a25 GK |
271 | |
272 | return 0; | |
38afd772 CLG |
273 | } |
274 | ||
0c575703 CLG |
275 | /* |
276 | * This is used to perform the magic loads on the ESB pages, described | |
277 | * in xive.h. | |
278 | * | |
279 | * Memory barriers should not be needed for loads (no store for now). | |
280 | */ | |
281 | static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, | |
282 | uint64_t data, bool write) | |
283 | { | |
284 | uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + | |
285 | offset; | |
286 | ||
287 | if (write) { | |
288 | *addr = cpu_to_be64(data); | |
289 | return -1; | |
290 | } else { | |
291 | /* Prevent the compiler from optimizing away the load */ | |
292 | volatile uint64_t value = be64_to_cpu(*addr); | |
293 | return value; | |
294 | } | |
295 | } | |
296 | ||
297 | static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) | |
298 | { | |
299 | return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; | |
300 | } | |
301 | ||
fb8dc327 | 302 | static void kvmppc_xive_esb_trigger(XiveSource *xsrc, int srcno) |
0c575703 | 303 | { |
644c6869 | 304 | xive_esb_rw(xsrc, srcno, 0, 0, true); |
0c575703 CLG |
305 | } |
306 | ||
307 | uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, | |
308 | uint64_t data, bool write) | |
309 | { | |
310 | if (write) { | |
311 | return xive_esb_rw(xsrc, srcno, offset, data, 1); | |
312 | } | |
313 | ||
314 | /* | |
315 | * Special Load EOI handling for LSI sources. Q bit is never set | |
316 | * and the interrupt should be re-triggered if the level is still | |
317 | * asserted. | |
318 | */ | |
319 | if (xive_source_irq_is_lsi(xsrc, srcno) && | |
320 | offset == XIVE_ESB_LOAD_EOI) { | |
321 | xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); | |
621f70d2 | 322 | if (xive_source_is_asserted(xsrc, srcno)) { |
fb8dc327 | 323 | kvmppc_xive_esb_trigger(xsrc, srcno); |
0c575703 CLG |
324 | } |
325 | return 0; | |
326 | } else { | |
327 | return xive_esb_rw(xsrc, srcno, offset, 0, 0); | |
328 | } | |
329 | } | |
330 | ||
7bfc759c CLG |
331 | static void kvmppc_xive_source_get_state(XiveSource *xsrc) |
332 | { | |
4c3539d4 | 333 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); |
7bfc759c CLG |
334 | int i; |
335 | ||
336 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
4c3539d4 CLG |
337 | uint8_t pq; |
338 | ||
6d24795e | 339 | if (!xive_eas_is_valid(&xive->eat[i])) { |
4c3539d4 CLG |
340 | continue; |
341 | } | |
342 | ||
7bfc759c | 343 | /* Perform a load without side effect to retrieve the PQ bits */ |
4c3539d4 | 344 | pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); |
7bfc759c CLG |
345 | |
346 | /* and save PQ locally */ | |
347 | xive_source_esb_set(xsrc, i, pq); | |
348 | } | |
349 | } | |
350 | ||
38afd772 CLG |
351 | void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) |
352 | { | |
353 | XiveSource *xsrc = opaque; | |
3bf84e99 | 354 | |
38afd772 CLG |
355 | if (!xive_source_irq_is_lsi(xsrc, srcno)) { |
356 | if (!val) { | |
357 | return; | |
358 | } | |
38afd772 | 359 | } else { |
621f70d2 | 360 | xive_source_set_asserted(xsrc, srcno, val); |
38afd772 | 361 | } |
58246041 | 362 | |
fb8dc327 | 363 | kvmppc_xive_esb_trigger(xsrc, srcno); |
38afd772 CLG |
364 | } |
365 | ||
366 | /* | |
367 | * sPAPR XIVE interrupt controller (KVM) | |
368 | */ | |
f9a548ed GK |
369 | int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, |
370 | uint32_t end_idx, XiveEND *end, | |
371 | Error **errp) | |
0c575703 CLG |
372 | { |
373 | struct kvm_ppc_xive_eq kvm_eq = { 0 }; | |
374 | uint64_t kvm_eq_idx; | |
375 | uint8_t priority; | |
376 | uint32_t server; | |
f9a548ed | 377 | int ret; |
0c575703 CLG |
378 | |
379 | assert(xive_end_is_valid(end)); | |
380 | ||
381 | /* Encode the tuple (server, prio) as a KVM EQ index */ | |
382 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
383 | ||
384 | kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & | |
385 | KVM_XIVE_EQ_PRIORITY_MASK; | |
386 | kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & | |
387 | KVM_XIVE_EQ_SERVER_MASK; | |
388 | ||
f9a548ed GK |
389 | ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, |
390 | &kvm_eq, false, errp); | |
391 | if (ret < 0) { | |
392 | return ret; | |
0c575703 CLG |
393 | } |
394 | ||
395 | /* | |
396 | * The EQ index and toggle bit are updated by HW. These are the | |
397 | * only fields from KVM we want to update QEMU with. The other END | |
398 | * fields should already be in the QEMU END table. | |
399 | */ | |
400 | end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | | |
401 | xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); | |
f9a548ed GK |
402 | |
403 | return 0; | |
0c575703 CLG |
404 | } |
405 | ||
f9a548ed GK |
406 | int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, |
407 | uint32_t end_idx, XiveEND *end, | |
408 | Error **errp) | |
0c575703 CLG |
409 | { |
410 | struct kvm_ppc_xive_eq kvm_eq = { 0 }; | |
411 | uint64_t kvm_eq_idx; | |
412 | uint8_t priority; | |
413 | uint32_t server; | |
0c575703 CLG |
414 | |
415 | /* | |
416 | * Build the KVM state from the local END structure. | |
417 | */ | |
418 | ||
419 | kvm_eq.flags = 0; | |
420 | if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { | |
421 | kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; | |
422 | } | |
423 | ||
424 | /* | |
425 | * If the hcall is disabling the EQ, set the size and page address | |
426 | * to zero. When migrating, only valid ENDs are taken into | |
427 | * account. | |
428 | */ | |
429 | if (xive_end_is_valid(end)) { | |
430 | kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; | |
431 | kvm_eq.qaddr = xive_end_qaddr(end); | |
432 | /* | |
433 | * The EQ toggle bit and index should only be relevant when | |
434 | * restoring the EQ state | |
435 | */ | |
436 | kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); | |
437 | kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); | |
438 | } else { | |
439 | kvm_eq.qshift = 0; | |
440 | kvm_eq.qaddr = 0; | |
441 | } | |
442 | ||
443 | /* Encode the tuple (server, prio) as a KVM EQ index */ | |
444 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
445 | ||
446 | kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & | |
447 | KVM_XIVE_EQ_PRIORITY_MASK; | |
448 | kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & | |
449 | KVM_XIVE_EQ_SERVER_MASK; | |
450 | ||
f9a548ed GK |
451 | return |
452 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, | |
453 | &kvm_eq, true, errp); | |
0c575703 CLG |
454 | } |
455 | ||
456 | void kvmppc_xive_reset(SpaprXive *xive, Error **errp) | |
457 | { | |
458 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, | |
459 | NULL, true, errp); | |
460 | } | |
38afd772 | 461 | |
d53482a7 | 462 | static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) |
7bfc759c | 463 | { |
7bfc759c | 464 | int i; |
d53482a7 | 465 | int ret; |
7bfc759c CLG |
466 | |
467 | for (i = 0; i < xive->nr_ends; i++) { | |
468 | if (!xive_end_is_valid(&xive->endt[i])) { | |
469 | continue; | |
470 | } | |
471 | ||
d53482a7 GK |
472 | ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, |
473 | &xive->endt[i], errp); | |
474 | if (ret < 0) { | |
475 | return ret; | |
7bfc759c CLG |
476 | } |
477 | } | |
d53482a7 GK |
478 | |
479 | return 0; | |
7bfc759c CLG |
480 | } |
481 | ||
9b88cd76 CLG |
482 | /* |
483 | * The primary goal of the XIVE VM change handler is to mark the EQ | |
484 | * pages dirty when all XIVE event notifications have stopped. | |
485 | * | |
486 | * Whenever the VM is stopped, the VM change handler sets the source | |
487 | * PQs to PENDING to stop the flow of events and to possibly catch a | |
9b4b4e51 | 488 | * triggered interrupt occurring while the VM is stopped. The previous |
9b88cd76 CLG |
489 | * state is saved in anticipation of a migration. The XIVE controller |
490 | * is then synced through KVM to flush any in-flight event | |
491 | * notification and stabilize the EQs. | |
492 | * | |
493 | * At this stage, we can mark the EQ page dirty and let a migration | |
494 | * sequence transfer the EQ pages to the destination, which is done | |
495 | * just after the stop state. | |
496 | * | |
497 | * The previous configuration of the sources is restored when the VM | |
498 | * runs again. If an interrupt was queued while the VM was stopped, | |
499 | * simply generate a trigger. | |
500 | */ | |
538f0497 | 501 | static void kvmppc_xive_change_state_handler(void *opaque, bool running, |
9b88cd76 CLG |
502 | RunState state) |
503 | { | |
504 | SpaprXive *xive = opaque; | |
505 | XiveSource *xsrc = &xive->source; | |
506 | Error *local_err = NULL; | |
507 | int i; | |
508 | ||
509 | /* | |
510 | * Restore the sources to their initial state. This is called when | |
511 | * the VM resumes after a stop or a migration. | |
512 | */ | |
513 | if (running) { | |
514 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
4c3539d4 | 515 | uint8_t pq; |
9b88cd76 CLG |
516 | uint8_t old_pq; |
517 | ||
6d24795e | 518 | if (!xive_eas_is_valid(&xive->eat[i])) { |
4c3539d4 CLG |
519 | continue; |
520 | } | |
521 | ||
522 | pq = xive_source_esb_get(xsrc, i); | |
9b88cd76 CLG |
523 | old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); |
524 | ||
525 | /* | |
526 | * An interrupt was queued while the VM was stopped, | |
527 | * generate a trigger. | |
528 | */ | |
529 | if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { | |
fb8dc327 | 530 | kvmppc_xive_esb_trigger(xsrc, i); |
9b88cd76 CLG |
531 | } |
532 | } | |
533 | ||
534 | return; | |
535 | } | |
536 | ||
537 | /* | |
538 | * Mask the sources, to stop the flow of event notifications, and | |
539 | * save the PQs locally in the XiveSource object. The XiveSource | |
540 | * state will be collected later on by its vmstate handler if a | |
541 | * migration is in progress. | |
542 | */ | |
543 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
4c3539d4 CLG |
544 | uint8_t pq; |
545 | ||
6d24795e | 546 | if (!xive_eas_is_valid(&xive->eat[i])) { |
4c3539d4 CLG |
547 | continue; |
548 | } | |
549 | ||
550 | pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); | |
9b88cd76 CLG |
551 | |
552 | /* | |
553 | * PQ is set to PENDING to possibly catch a triggered | |
9b4b4e51 | 554 | * interrupt occurring while the VM is stopped (hotplug event |
9b88cd76 CLG |
555 | * for instance) . |
556 | */ | |
557 | if (pq != XIVE_ESB_OFF) { | |
558 | pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); | |
559 | } | |
560 | xive_source_esb_set(xsrc, i, pq); | |
561 | } | |
562 | ||
563 | /* | |
564 | * Sync the XIVE controller in KVM, to flush in-flight event | |
565 | * notification that should be enqueued in the EQs and mark the | |
566 | * XIVE EQ pages dirty to collect all updates. | |
567 | */ | |
568 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, | |
569 | KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); | |
570 | if (local_err) { | |
571 | error_report_err(local_err); | |
572 | return; | |
573 | } | |
574 | } | |
575 | ||
7bfc759c CLG |
576 | void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) |
577 | { | |
a4907119 | 578 | assert(xive->fd != -1); |
3bf84e99 | 579 | |
9b88cd76 CLG |
580 | /* |
581 | * When the VM is stopped, the sources are masked and the previous | |
582 | * state is saved in anticipation of a migration. We should not | |
583 | * synchronize the source state in that case else we will override | |
584 | * the saved state. | |
585 | */ | |
586 | if (runstate_is_running()) { | |
587 | kvmppc_xive_source_get_state(&xive->source); | |
588 | } | |
7bfc759c CLG |
589 | |
590 | /* EAT: there is no extra state to query from KVM */ | |
591 | ||
592 | /* ENDT */ | |
593 | kvmppc_xive_get_queues(xive, errp); | |
594 | } | |
595 | ||
277dd3d7 CLG |
596 | /* |
597 | * The SpaprXive 'pre_save' method is called by the vmstate handler of | |
598 | * the SpaprXive model, after the XIVE controller is synced in the VM | |
599 | * change handler. | |
600 | */ | |
601 | int kvmppc_xive_pre_save(SpaprXive *xive) | |
602 | { | |
603 | Error *local_err = NULL; | |
42a92d92 | 604 | int ret; |
277dd3d7 | 605 | |
a4907119 | 606 | assert(xive->fd != -1); |
3bf84e99 | 607 | |
277dd3d7 CLG |
608 | /* EAT: there is no extra state to query from KVM */ |
609 | ||
610 | /* ENDT */ | |
42a92d92 GK |
611 | ret = kvmppc_xive_get_queues(xive, &local_err); |
612 | if (ret < 0) { | |
277dd3d7 | 613 | error_report_err(local_err); |
42a92d92 | 614 | return ret; |
277dd3d7 CLG |
615 | } |
616 | ||
617 | return 0; | |
618 | } | |
619 | ||
620 | /* | |
621 | * The SpaprXive 'post_load' method is not called by a vmstate | |
622 | * handler. It is called at the sPAPR machine level at the end of the | |
623 | * migration sequence by the sPAPR IRQ backend 'post_load' method, | |
624 | * when all XIVE states have been transferred and loaded. | |
625 | */ | |
626 | int kvmppc_xive_post_load(SpaprXive *xive, int version_id) | |
627 | { | |
628 | Error *local_err = NULL; | |
629 | CPUState *cs; | |
630 | int i; | |
a845a54c | 631 | int ret; |
277dd3d7 | 632 | |
3bf84e99 CLG |
633 | /* The KVM XIVE device should be in use */ |
634 | assert(xive->fd != -1); | |
635 | ||
9b4b4e51 | 636 | /* Restore the ENDT first. The targeting depends on it. */ |
277dd3d7 CLG |
637 | for (i = 0; i < xive->nr_ends; i++) { |
638 | if (!xive_end_is_valid(&xive->endt[i])) { | |
639 | continue; | |
640 | } | |
641 | ||
a845a54c GK |
642 | ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, |
643 | &xive->endt[i], &local_err); | |
644 | if (ret < 0) { | |
645 | goto fail; | |
277dd3d7 CLG |
646 | } |
647 | } | |
648 | ||
649 | /* Restore the EAT */ | |
650 | for (i = 0; i < xive->nr_irqs; i++) { | |
6d24795e | 651 | if (!xive_eas_is_valid(&xive->eat[i])) { |
277dd3d7 CLG |
652 | continue; |
653 | } | |
654 | ||
6d24795e GK |
655 | /* |
656 | * We can only restore the source config if the source has been | |
657 | * previously set in KVM. Since we don't do that for all interrupts | |
658 | * at reset time anymore, let's do it now. | |
659 | */ | |
660 | ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err); | |
661 | if (ret < 0) { | |
662 | goto fail; | |
663 | } | |
664 | ||
a845a54c GK |
665 | ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); |
666 | if (ret < 0) { | |
667 | goto fail; | |
277dd3d7 CLG |
668 | } |
669 | } | |
670 | ||
310cda5b CLG |
671 | /* |
672 | * Restore the thread interrupt contexts of initial CPUs. | |
673 | * | |
674 | * The context of hotplugged CPUs is restored later, by the | |
675 | * 'post_load' handler of the XiveTCTX model because they are not | |
676 | * available at the time the SpaprXive 'post_load' method is | |
677 | * called. We can not restore the context of all CPUs in the | |
678 | * 'post_load' handler of XiveTCTX because the machine is not | |
679 | * necessarily connected to the KVM device at that time. | |
680 | */ | |
277dd3d7 CLG |
681 | CPU_FOREACH(cs) { |
682 | PowerPCCPU *cpu = POWERPC_CPU(cs); | |
683 | ||
a845a54c GK |
684 | ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); |
685 | if (ret < 0) { | |
686 | goto fail; | |
277dd3d7 CLG |
687 | } |
688 | } | |
689 | ||
690 | /* The source states will be restored when the machine starts running */ | |
691 | return 0; | |
a845a54c GK |
692 | |
693 | fail: | |
694 | error_report_err(local_err); | |
695 | return ret; | |
277dd3d7 CLG |
696 | } |
697 | ||
b14adb4a | 698 | /* Returns MAP_FAILED on error and sets errno */ |
38afd772 CLG |
699 | static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, |
700 | Error **errp) | |
701 | { | |
702 | void *addr; | |
703 | uint32_t page_shift = 16; /* TODO: fix page_shift */ | |
704 | ||
705 | addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, | |
706 | pgoff << page_shift); | |
707 | if (addr == MAP_FAILED) { | |
708 | error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); | |
38afd772 CLG |
709 | } |
710 | ||
711 | return addr; | |
712 | } | |
713 | ||
714 | /* | |
715 | * All the XIVE memory regions are now backed by mappings from the KVM | |
716 | * XIVE device. | |
717 | */ | |
4ffb7496 GK |
718 | int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, |
719 | Error **errp) | |
38afd772 | 720 | { |
98a39a79 | 721 | SpaprXive *xive = SPAPR_XIVE(intc); |
38afd772 | 722 | XiveSource *xsrc = &xive->source; |
cf36e5b3 | 723 | size_t esb_len = xive_source_esb_len(xsrc); |
38afd772 | 724 | size_t tima_len = 4ull << TM_SHIFT; |
3f777abc | 725 | CPUState *cs; |
82f086b5 | 726 | int fd; |
b14adb4a | 727 | void *addr; |
6cdc0e20 | 728 | int ret; |
3f777abc CLG |
729 | |
730 | /* | |
731 | * The KVM XIVE device already in use. This is the case when | |
732 | * rebooting under the XIVE-only interrupt mode. | |
733 | */ | |
734 | if (xive->fd != -1) { | |
98a39a79 | 735 | return 0; |
3f777abc | 736 | } |
38afd772 CLG |
737 | |
738 | if (!kvmppc_has_cap_xive()) { | |
739 | error_setg(errp, "IRQ_XIVE capability must be present for KVM"); | |
98a39a79 | 740 | return -1; |
38afd772 CLG |
741 | } |
742 | ||
743 | /* First, create the KVM XIVE device */ | |
82f086b5 GK |
744 | fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); |
745 | if (fd < 0) { | |
746 | error_setg_errno(errp, -fd, "XIVE: error creating KVM device"); | |
98a39a79 | 747 | return -1; |
38afd772 | 748 | } |
82f086b5 | 749 | xive->fd = fd; |
38afd772 | 750 | |
74f23d43 GK |
751 | /* Tell KVM about the # of VCPUs we may have */ |
752 | if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, | |
753 | KVM_DEV_XIVE_NR_SERVERS)) { | |
6cdc0e20 GK |
754 | ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, |
755 | KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, | |
756 | errp); | |
757 | if (ret < 0) { | |
74f23d43 GK |
758 | goto fail; |
759 | } | |
760 | } | |
761 | ||
38afd772 CLG |
762 | /* |
763 | * 1. Source ESB pages - KVM mapping | |
764 | */ | |
6cdc0e20 | 765 | addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp); |
b14adb4a | 766 | if (addr == MAP_FAILED) { |
1c3d4a8f | 767 | goto fail; |
38afd772 | 768 | } |
b14adb4a | 769 | xsrc->esb_mmap = addr; |
38afd772 | 770 | |
981b1c62 | 771 | memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), |
cf36e5b3 | 772 | "xive.esb-kvm", esb_len, xsrc->esb_mmap); |
981b1c62 CLG |
773 | memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, |
774 | &xsrc->esb_mmio_kvm, 1); | |
38afd772 CLG |
775 | |
776 | /* | |
777 | * 2. END ESB pages (No KVM support yet) | |
778 | */ | |
38afd772 CLG |
779 | |
780 | /* | |
781 | * 3. TIMA pages - KVM mapping | |
782 | */ | |
6cdc0e20 | 783 | addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp); |
b14adb4a | 784 | if (addr == MAP_FAILED) { |
1c3d4a8f | 785 | goto fail; |
38afd772 | 786 | } |
b14adb4a GK |
787 | xive->tm_mmap = addr; |
788 | ||
981b1c62 | 789 | memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), |
38afd772 | 790 | "xive.tima", tima_len, xive->tm_mmap); |
981b1c62 CLG |
791 | memory_region_add_subregion_overlap(&xive->tm_mmio, 0, |
792 | &xive->tm_mmio_kvm, 1); | |
38afd772 | 793 | |
9b88cd76 CLG |
794 | xive->change = qemu_add_vm_change_state_handler( |
795 | kvmppc_xive_change_state_handler, xive); | |
796 | ||
3f777abc CLG |
797 | /* Connect the presenters to the initial VCPUs of the machine */ |
798 | CPU_FOREACH(cs) { | |
799 | PowerPCCPU *cpu = POWERPC_CPU(cs); | |
800 | ||
6cdc0e20 GK |
801 | ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp); |
802 | if (ret < 0) { | |
1c3d4a8f | 803 | goto fail; |
3f777abc CLG |
804 | } |
805 | } | |
806 | ||
807 | /* Update the KVM sources */ | |
6cdc0e20 GK |
808 | ret = kvmppc_xive_source_reset(xsrc, errp); |
809 | if (ret < 0) { | |
1c3d4a8f | 810 | goto fail; |
3f777abc CLG |
811 | } |
812 | ||
38afd772 CLG |
813 | kvm_kernel_irqchip = true; |
814 | kvm_msi_via_irqfd_allowed = true; | |
815 | kvm_gsi_direct_mapping = true; | |
98a39a79 | 816 | return 0; |
1c3d4a8f GK |
817 | |
818 | fail: | |
98a39a79 DG |
819 | kvmppc_xive_disconnect(intc); |
820 | return -1; | |
38afd772 | 821 | } |
56b11587 | 822 | |
98a39a79 | 823 | void kvmppc_xive_disconnect(SpaprInterruptController *intc) |
56b11587 | 824 | { |
98a39a79 | 825 | SpaprXive *xive = SPAPR_XIVE(intc); |
56b11587 CLG |
826 | XiveSource *xsrc; |
827 | size_t esb_len; | |
828 | ||
a4907119 | 829 | assert(xive->fd != -1); |
56b11587 | 830 | |
56b11587 CLG |
831 | /* Clear the KVM mapping */ |
832 | xsrc = &xive->source; | |
3110f0ee | 833 | esb_len = xive_source_esb_len(xsrc); |
56b11587 | 834 | |
1c3d4a8f GK |
835 | if (xsrc->esb_mmap) { |
836 | memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); | |
837 | object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); | |
838 | munmap(xsrc->esb_mmap, esb_len); | |
839 | xsrc->esb_mmap = NULL; | |
840 | } | |
56b11587 | 841 | |
1c3d4a8f GK |
842 | if (xive->tm_mmap) { |
843 | memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); | |
844 | object_unparent(OBJECT(&xive->tm_mmio_kvm)); | |
845 | munmap(xive->tm_mmap, 4ull << TM_SHIFT); | |
846 | xive->tm_mmap = NULL; | |
847 | } | |
56b11587 CLG |
848 | |
849 | /* | |
850 | * When the KVM device fd is closed, the KVM device is destroyed | |
851 | * and removed from the list of devices of the VM. The VCPU | |
852 | * presenters are also detached from the device. | |
853 | */ | |
e7811395 GK |
854 | close(xive->fd); |
855 | xive->fd = -1; | |
56b11587 CLG |
856 | |
857 | kvm_kernel_irqchip = false; | |
858 | kvm_msi_via_irqfd_allowed = false; | |
859 | kvm_gsi_direct_mapping = false; | |
860 | ||
861 | /* Clear the local list of presenter (hotplug) */ | |
862 | kvm_cpu_disable_all(); | |
863 | ||
864 | /* VM Change state handler is not needed anymore */ | |
1c3d4a8f GK |
865 | if (xive->change) { |
866 | qemu_del_vm_change_state_handler(xive->change); | |
867 | xive->change = NULL; | |
868 | } | |
56b11587 | 869 | } |