]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU PowerPC sPAPR XIVE interrupt controller model | |
3 | * | |
4 | * Copyright (c) 2017-2019, IBM Corporation. | |
5 | * | |
6 | * This code is licensed under the GPL version 2 or later. See the | |
7 | * COPYING file in the top-level directory. | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "qemu/log.h" | |
12 | #include "qemu/error-report.h" | |
13 | #include "qapi/error.h" | |
14 | #include "target/ppc/cpu.h" | |
15 | #include "sysemu/cpus.h" | |
16 | #include "sysemu/kvm.h" | |
17 | #include "sysemu/runstate.h" | |
18 | #include "hw/ppc/spapr.h" | |
19 | #include "hw/ppc/spapr_cpu_core.h" | |
20 | #include "hw/ppc/spapr_xive.h" | |
21 | #include "hw/ppc/xive.h" | |
22 | #include "kvm_ppc.h" | |
23 | ||
24 | #include <sys/ioctl.h> | |
25 | ||
26 | /* | |
27 | * Helpers for CPU hotplug | |
28 | * | |
29 | * TODO: make a common KVMEnabledCPU layer for XICS and XIVE | |
30 | */ | |
31 | typedef struct KVMEnabledCPU { | |
32 | unsigned long vcpu_id; | |
33 | QLIST_ENTRY(KVMEnabledCPU) node; | |
34 | } KVMEnabledCPU; | |
35 | ||
36 | static QLIST_HEAD(, KVMEnabledCPU) | |
37 | kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); | |
38 | ||
39 | static bool kvm_cpu_is_enabled(CPUState *cs) | |
40 | { | |
41 | KVMEnabledCPU *enabled_cpu; | |
42 | unsigned long vcpu_id = kvm_arch_vcpu_id(cs); | |
43 | ||
44 | QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { | |
45 | if (enabled_cpu->vcpu_id == vcpu_id) { | |
46 | return true; | |
47 | } | |
48 | } | |
49 | return false; | |
50 | } | |
51 | ||
52 | static void kvm_cpu_enable(CPUState *cs) | |
53 | { | |
54 | KVMEnabledCPU *enabled_cpu; | |
55 | unsigned long vcpu_id = kvm_arch_vcpu_id(cs); | |
56 | ||
57 | enabled_cpu = g_malloc(sizeof(*enabled_cpu)); | |
58 | enabled_cpu->vcpu_id = vcpu_id; | |
59 | QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); | |
60 | } | |
61 | ||
62 | static void kvm_cpu_disable_all(void) | |
63 | { | |
64 | KVMEnabledCPU *enabled_cpu, *next; | |
65 | ||
66 | QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { | |
67 | QLIST_REMOVE(enabled_cpu, node); | |
68 | g_free(enabled_cpu); | |
69 | } | |
70 | } | |
71 | ||
72 | /* | |
73 | * XIVE Thread Interrupt Management context (KVM) | |
74 | */ | |
75 | ||
76 | void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) | |
77 | { | |
78 | SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; | |
79 | uint64_t state[2]; | |
80 | int ret; | |
81 | ||
82 | /* The KVM XIVE device is not in use yet */ | |
83 | if (xive->fd == -1) { | |
84 | return; | |
85 | } | |
86 | ||
87 | /* word0 and word1 of the OS ring. */ | |
88 | state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); | |
89 | ||
90 | ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); | |
91 | if (ret != 0) { | |
92 | error_setg_errno(errp, errno, | |
93 | "XIVE: could not restore KVM state of CPU %ld", | |
94 | kvm_arch_vcpu_id(tctx->cs)); | |
95 | } | |
96 | } | |
97 | ||
98 | void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) | |
99 | { | |
100 | SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; | |
101 | uint64_t state[2] = { 0 }; | |
102 | int ret; | |
103 | ||
104 | /* The KVM XIVE device is not in use */ | |
105 | if (xive->fd == -1) { | |
106 | return; | |
107 | } | |
108 | ||
109 | ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); | |
110 | if (ret != 0) { | |
111 | error_setg_errno(errp, errno, | |
112 | "XIVE: could not capture KVM state of CPU %ld", | |
113 | kvm_arch_vcpu_id(tctx->cs)); | |
114 | return; | |
115 | } | |
116 | ||
117 | /* word0 and word1 of the OS ring. */ | |
118 | *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; | |
119 | } | |
120 | ||
121 | typedef struct { | |
122 | XiveTCTX *tctx; | |
123 | Error *err; | |
124 | } XiveCpuGetState; | |
125 | ||
126 | static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, | |
127 | run_on_cpu_data arg) | |
128 | { | |
129 | XiveCpuGetState *s = arg.host_ptr; | |
130 | ||
131 | kvmppc_xive_cpu_get_state(s->tctx, &s->err); | |
132 | } | |
133 | ||
134 | void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) | |
135 | { | |
136 | XiveCpuGetState s = { | |
137 | .tctx = tctx, | |
138 | .err = NULL, | |
139 | }; | |
140 | ||
141 | /* | |
142 | * Kick the vCPU to make sure they are available for the KVM ioctl. | |
143 | */ | |
144 | run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, | |
145 | RUN_ON_CPU_HOST_PTR(&s)); | |
146 | ||
147 | if (s.err) { | |
148 | error_propagate(errp, s.err); | |
149 | return; | |
150 | } | |
151 | } | |
152 | ||
153 | void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) | |
154 | { | |
155 | MachineState *ms = MACHINE(qdev_get_machine()); | |
156 | SpaprXive *xive = SPAPR_MACHINE(ms)->xive; | |
157 | unsigned long vcpu_id; | |
158 | int ret; | |
159 | ||
160 | /* The KVM XIVE device is not in use */ | |
161 | if (xive->fd == -1) { | |
162 | return; | |
163 | } | |
164 | ||
165 | /* Check if CPU was hot unplugged and replugged. */ | |
166 | if (kvm_cpu_is_enabled(tctx->cs)) { | |
167 | return; | |
168 | } | |
169 | ||
170 | vcpu_id = kvm_arch_vcpu_id(tctx->cs); | |
171 | ||
172 | ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, | |
173 | vcpu_id, 0); | |
174 | if (ret < 0) { | |
175 | Error *local_err = NULL; | |
176 | ||
177 | error_setg(&local_err, | |
178 | "XIVE: unable to connect CPU%ld to KVM device: %s", | |
179 | vcpu_id, strerror(errno)); | |
180 | if (errno == ENOSPC) { | |
181 | error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n", | |
182 | ms->smp.max_cpus); | |
183 | } | |
184 | error_propagate(errp, local_err); | |
185 | return; | |
186 | } | |
187 | ||
188 | kvm_cpu_enable(tctx->cs); | |
189 | } | |
190 | ||
191 | /* | |
192 | * XIVE Interrupt Source (KVM) | |
193 | */ | |
194 | ||
195 | void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, | |
196 | Error **errp) | |
197 | { | |
198 | uint32_t end_idx; | |
199 | uint32_t end_blk; | |
200 | uint8_t priority; | |
201 | uint32_t server; | |
202 | bool masked; | |
203 | uint32_t eisn; | |
204 | uint64_t kvm_src; | |
205 | Error *local_err = NULL; | |
206 | ||
207 | assert(xive_eas_is_valid(eas)); | |
208 | ||
209 | end_idx = xive_get_field64(EAS_END_INDEX, eas->w); | |
210 | end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); | |
211 | eisn = xive_get_field64(EAS_END_DATA, eas->w); | |
212 | masked = xive_eas_is_masked(eas); | |
213 | ||
214 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
215 | ||
216 | kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & | |
217 | KVM_XIVE_SOURCE_PRIORITY_MASK; | |
218 | kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & | |
219 | KVM_XIVE_SOURCE_SERVER_MASK; | |
220 | kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & | |
221 | KVM_XIVE_SOURCE_MASKED_MASK; | |
222 | kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & | |
223 | KVM_XIVE_SOURCE_EISN_MASK; | |
224 | ||
225 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, | |
226 | &kvm_src, true, &local_err); | |
227 | if (local_err) { | |
228 | error_propagate(errp, local_err); | |
229 | return; | |
230 | } | |
231 | } | |
232 | ||
233 | void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) | |
234 | { | |
235 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, | |
236 | NULL, true, errp); | |
237 | } | |
238 | ||
239 | /* | |
240 | * At reset, the interrupt sources are simply created and MASKED. We | |
241 | * only need to inform the KVM XIVE device about their type: LSI or | |
242 | * MSI. | |
243 | */ | |
244 | int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) | |
245 | { | |
246 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); | |
247 | uint64_t state = 0; | |
248 | ||
249 | /* The KVM XIVE device is not in use */ | |
250 | if (xive->fd == -1) { | |
251 | return -ENODEV; | |
252 | } | |
253 | ||
254 | if (xive_source_irq_is_lsi(xsrc, srcno)) { | |
255 | state |= KVM_XIVE_LEVEL_SENSITIVE; | |
256 | if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { | |
257 | state |= KVM_XIVE_LEVEL_ASSERTED; | |
258 | } | |
259 | } | |
260 | ||
261 | return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, | |
262 | true, errp); | |
263 | } | |
264 | ||
265 | static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) | |
266 | { | |
267 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); | |
268 | int i; | |
269 | ||
270 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
271 | Error *local_err = NULL; | |
272 | ||
273 | if (!xive_eas_is_valid(&xive->eat[i])) { | |
274 | continue; | |
275 | } | |
276 | ||
277 | kvmppc_xive_source_reset_one(xsrc, i, &local_err); | |
278 | if (local_err) { | |
279 | error_propagate(errp, local_err); | |
280 | return; | |
281 | } | |
282 | } | |
283 | } | |
284 | ||
285 | /* | |
286 | * This is used to perform the magic loads on the ESB pages, described | |
287 | * in xive.h. | |
288 | * | |
289 | * Memory barriers should not be needed for loads (no store for now). | |
290 | */ | |
291 | static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, | |
292 | uint64_t data, bool write) | |
293 | { | |
294 | uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + | |
295 | offset; | |
296 | ||
297 | if (write) { | |
298 | *addr = cpu_to_be64(data); | |
299 | return -1; | |
300 | } else { | |
301 | /* Prevent the compiler from optimizing away the load */ | |
302 | volatile uint64_t value = be64_to_cpu(*addr); | |
303 | return value; | |
304 | } | |
305 | } | |
306 | ||
307 | static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) | |
308 | { | |
309 | return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; | |
310 | } | |
311 | ||
312 | static void xive_esb_trigger(XiveSource *xsrc, int srcno) | |
313 | { | |
314 | uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); | |
315 | ||
316 | *addr = 0x0; | |
317 | } | |
318 | ||
319 | uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, | |
320 | uint64_t data, bool write) | |
321 | { | |
322 | if (write) { | |
323 | return xive_esb_rw(xsrc, srcno, offset, data, 1); | |
324 | } | |
325 | ||
326 | /* | |
327 | * Special Load EOI handling for LSI sources. Q bit is never set | |
328 | * and the interrupt should be re-triggered if the level is still | |
329 | * asserted. | |
330 | */ | |
331 | if (xive_source_irq_is_lsi(xsrc, srcno) && | |
332 | offset == XIVE_ESB_LOAD_EOI) { | |
333 | xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); | |
334 | if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { | |
335 | xive_esb_trigger(xsrc, srcno); | |
336 | } | |
337 | return 0; | |
338 | } else { | |
339 | return xive_esb_rw(xsrc, srcno, offset, 0, 0); | |
340 | } | |
341 | } | |
342 | ||
343 | static void kvmppc_xive_source_get_state(XiveSource *xsrc) | |
344 | { | |
345 | SpaprXive *xive = SPAPR_XIVE(xsrc->xive); | |
346 | int i; | |
347 | ||
348 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
349 | uint8_t pq; | |
350 | ||
351 | if (!xive_eas_is_valid(&xive->eat[i])) { | |
352 | continue; | |
353 | } | |
354 | ||
355 | /* Perform a load without side effect to retrieve the PQ bits */ | |
356 | pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); | |
357 | ||
358 | /* and save PQ locally */ | |
359 | xive_source_esb_set(xsrc, i, pq); | |
360 | } | |
361 | } | |
362 | ||
363 | void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) | |
364 | { | |
365 | XiveSource *xsrc = opaque; | |
366 | ||
367 | if (!xive_source_irq_is_lsi(xsrc, srcno)) { | |
368 | if (!val) { | |
369 | return; | |
370 | } | |
371 | } else { | |
372 | if (val) { | |
373 | xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; | |
374 | } else { | |
375 | xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; | |
376 | } | |
377 | } | |
378 | ||
379 | xive_esb_trigger(xsrc, srcno); | |
380 | } | |
381 | ||
382 | /* | |
383 | * sPAPR XIVE interrupt controller (KVM) | |
384 | */ | |
385 | void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, | |
386 | uint32_t end_idx, XiveEND *end, | |
387 | Error **errp) | |
388 | { | |
389 | struct kvm_ppc_xive_eq kvm_eq = { 0 }; | |
390 | uint64_t kvm_eq_idx; | |
391 | uint8_t priority; | |
392 | uint32_t server; | |
393 | Error *local_err = NULL; | |
394 | ||
395 | assert(xive_end_is_valid(end)); | |
396 | ||
397 | /* Encode the tuple (server, prio) as a KVM EQ index */ | |
398 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
399 | ||
400 | kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & | |
401 | KVM_XIVE_EQ_PRIORITY_MASK; | |
402 | kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & | |
403 | KVM_XIVE_EQ_SERVER_MASK; | |
404 | ||
405 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, | |
406 | &kvm_eq, false, &local_err); | |
407 | if (local_err) { | |
408 | error_propagate(errp, local_err); | |
409 | return; | |
410 | } | |
411 | ||
412 | /* | |
413 | * The EQ index and toggle bit are updated by HW. These are the | |
414 | * only fields from KVM we want to update QEMU with. The other END | |
415 | * fields should already be in the QEMU END table. | |
416 | */ | |
417 | end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | | |
418 | xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); | |
419 | } | |
420 | ||
421 | void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, | |
422 | uint32_t end_idx, XiveEND *end, | |
423 | Error **errp) | |
424 | { | |
425 | struct kvm_ppc_xive_eq kvm_eq = { 0 }; | |
426 | uint64_t kvm_eq_idx; | |
427 | uint8_t priority; | |
428 | uint32_t server; | |
429 | Error *local_err = NULL; | |
430 | ||
431 | /* | |
432 | * Build the KVM state from the local END structure. | |
433 | */ | |
434 | ||
435 | kvm_eq.flags = 0; | |
436 | if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { | |
437 | kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; | |
438 | } | |
439 | ||
440 | /* | |
441 | * If the hcall is disabling the EQ, set the size and page address | |
442 | * to zero. When migrating, only valid ENDs are taken into | |
443 | * account. | |
444 | */ | |
445 | if (xive_end_is_valid(end)) { | |
446 | kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; | |
447 | kvm_eq.qaddr = xive_end_qaddr(end); | |
448 | /* | |
449 | * The EQ toggle bit and index should only be relevant when | |
450 | * restoring the EQ state | |
451 | */ | |
452 | kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); | |
453 | kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); | |
454 | } else { | |
455 | kvm_eq.qshift = 0; | |
456 | kvm_eq.qaddr = 0; | |
457 | } | |
458 | ||
459 | /* Encode the tuple (server, prio) as a KVM EQ index */ | |
460 | spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); | |
461 | ||
462 | kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & | |
463 | KVM_XIVE_EQ_PRIORITY_MASK; | |
464 | kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & | |
465 | KVM_XIVE_EQ_SERVER_MASK; | |
466 | ||
467 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, | |
468 | &kvm_eq, true, &local_err); | |
469 | if (local_err) { | |
470 | error_propagate(errp, local_err); | |
471 | return; | |
472 | } | |
473 | } | |
474 | ||
475 | void kvmppc_xive_reset(SpaprXive *xive, Error **errp) | |
476 | { | |
477 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, | |
478 | NULL, true, errp); | |
479 | } | |
480 | ||
481 | static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) | |
482 | { | |
483 | Error *local_err = NULL; | |
484 | int i; | |
485 | ||
486 | for (i = 0; i < xive->nr_ends; i++) { | |
487 | if (!xive_end_is_valid(&xive->endt[i])) { | |
488 | continue; | |
489 | } | |
490 | ||
491 | kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, | |
492 | &xive->endt[i], &local_err); | |
493 | if (local_err) { | |
494 | error_propagate(errp, local_err); | |
495 | return; | |
496 | } | |
497 | } | |
498 | } | |
499 | ||
500 | /* | |
501 | * The primary goal of the XIVE VM change handler is to mark the EQ | |
502 | * pages dirty when all XIVE event notifications have stopped. | |
503 | * | |
504 | * Whenever the VM is stopped, the VM change handler sets the source | |
505 | * PQs to PENDING to stop the flow of events and to possibly catch a | |
506 | * triggered interrupt occuring while the VM is stopped. The previous | |
507 | * state is saved in anticipation of a migration. The XIVE controller | |
508 | * is then synced through KVM to flush any in-flight event | |
509 | * notification and stabilize the EQs. | |
510 | * | |
511 | * At this stage, we can mark the EQ page dirty and let a migration | |
512 | * sequence transfer the EQ pages to the destination, which is done | |
513 | * just after the stop state. | |
514 | * | |
515 | * The previous configuration of the sources is restored when the VM | |
516 | * runs again. If an interrupt was queued while the VM was stopped, | |
517 | * simply generate a trigger. | |
518 | */ | |
519 | static void kvmppc_xive_change_state_handler(void *opaque, int running, | |
520 | RunState state) | |
521 | { | |
522 | SpaprXive *xive = opaque; | |
523 | XiveSource *xsrc = &xive->source; | |
524 | Error *local_err = NULL; | |
525 | int i; | |
526 | ||
527 | /* | |
528 | * Restore the sources to their initial state. This is called when | |
529 | * the VM resumes after a stop or a migration. | |
530 | */ | |
531 | if (running) { | |
532 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
533 | uint8_t pq; | |
534 | uint8_t old_pq; | |
535 | ||
536 | if (!xive_eas_is_valid(&xive->eat[i])) { | |
537 | continue; | |
538 | } | |
539 | ||
540 | pq = xive_source_esb_get(xsrc, i); | |
541 | old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); | |
542 | ||
543 | /* | |
544 | * An interrupt was queued while the VM was stopped, | |
545 | * generate a trigger. | |
546 | */ | |
547 | if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { | |
548 | xive_esb_trigger(xsrc, i); | |
549 | } | |
550 | } | |
551 | ||
552 | return; | |
553 | } | |
554 | ||
555 | /* | |
556 | * Mask the sources, to stop the flow of event notifications, and | |
557 | * save the PQs locally in the XiveSource object. The XiveSource | |
558 | * state will be collected later on by its vmstate handler if a | |
559 | * migration is in progress. | |
560 | */ | |
561 | for (i = 0; i < xsrc->nr_irqs; i++) { | |
562 | uint8_t pq; | |
563 | ||
564 | if (!xive_eas_is_valid(&xive->eat[i])) { | |
565 | continue; | |
566 | } | |
567 | ||
568 | pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); | |
569 | ||
570 | /* | |
571 | * PQ is set to PENDING to possibly catch a triggered | |
572 | * interrupt occuring while the VM is stopped (hotplug event | |
573 | * for instance) . | |
574 | */ | |
575 | if (pq != XIVE_ESB_OFF) { | |
576 | pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); | |
577 | } | |
578 | xive_source_esb_set(xsrc, i, pq); | |
579 | } | |
580 | ||
581 | /* | |
582 | * Sync the XIVE controller in KVM, to flush in-flight event | |
583 | * notification that should be enqueued in the EQs and mark the | |
584 | * XIVE EQ pages dirty to collect all updates. | |
585 | */ | |
586 | kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, | |
587 | KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); | |
588 | if (local_err) { | |
589 | error_report_err(local_err); | |
590 | return; | |
591 | } | |
592 | } | |
593 | ||
594 | void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) | |
595 | { | |
596 | /* The KVM XIVE device is not in use */ | |
597 | if (xive->fd == -1) { | |
598 | return; | |
599 | } | |
600 | ||
601 | /* | |
602 | * When the VM is stopped, the sources are masked and the previous | |
603 | * state is saved in anticipation of a migration. We should not | |
604 | * synchronize the source state in that case else we will override | |
605 | * the saved state. | |
606 | */ | |
607 | if (runstate_is_running()) { | |
608 | kvmppc_xive_source_get_state(&xive->source); | |
609 | } | |
610 | ||
611 | /* EAT: there is no extra state to query from KVM */ | |
612 | ||
613 | /* ENDT */ | |
614 | kvmppc_xive_get_queues(xive, errp); | |
615 | } | |
616 | ||
617 | /* | |
618 | * The SpaprXive 'pre_save' method is called by the vmstate handler of | |
619 | * the SpaprXive model, after the XIVE controller is synced in the VM | |
620 | * change handler. | |
621 | */ | |
622 | int kvmppc_xive_pre_save(SpaprXive *xive) | |
623 | { | |
624 | Error *local_err = NULL; | |
625 | ||
626 | /* The KVM XIVE device is not in use */ | |
627 | if (xive->fd == -1) { | |
628 | return 0; | |
629 | } | |
630 | ||
631 | /* EAT: there is no extra state to query from KVM */ | |
632 | ||
633 | /* ENDT */ | |
634 | kvmppc_xive_get_queues(xive, &local_err); | |
635 | if (local_err) { | |
636 | error_report_err(local_err); | |
637 | return -1; | |
638 | } | |
639 | ||
640 | return 0; | |
641 | } | |
642 | ||
643 | /* | |
644 | * The SpaprXive 'post_load' method is not called by a vmstate | |
645 | * handler. It is called at the sPAPR machine level at the end of the | |
646 | * migration sequence by the sPAPR IRQ backend 'post_load' method, | |
647 | * when all XIVE states have been transferred and loaded. | |
648 | */ | |
649 | int kvmppc_xive_post_load(SpaprXive *xive, int version_id) | |
650 | { | |
651 | Error *local_err = NULL; | |
652 | CPUState *cs; | |
653 | int i; | |
654 | ||
655 | /* The KVM XIVE device should be in use */ | |
656 | assert(xive->fd != -1); | |
657 | ||
658 | /* Restore the ENDT first. The targetting depends on it. */ | |
659 | for (i = 0; i < xive->nr_ends; i++) { | |
660 | if (!xive_end_is_valid(&xive->endt[i])) { | |
661 | continue; | |
662 | } | |
663 | ||
664 | kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, | |
665 | &xive->endt[i], &local_err); | |
666 | if (local_err) { | |
667 | error_report_err(local_err); | |
668 | return -1; | |
669 | } | |
670 | } | |
671 | ||
672 | /* Restore the EAT */ | |
673 | for (i = 0; i < xive->nr_irqs; i++) { | |
674 | if (!xive_eas_is_valid(&xive->eat[i])) { | |
675 | continue; | |
676 | } | |
677 | ||
678 | /* | |
679 | * We can only restore the source config if the source has been | |
680 | * previously set in KVM. Since we don't do that for all interrupts | |
681 | * at reset time anymore, let's do it now. | |
682 | */ | |
683 | kvmppc_xive_source_reset_one(&xive->source, i, &local_err); | |
684 | if (local_err) { | |
685 | error_report_err(local_err); | |
686 | return -1; | |
687 | } | |
688 | ||
689 | kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); | |
690 | if (local_err) { | |
691 | error_report_err(local_err); | |
692 | return -1; | |
693 | } | |
694 | } | |
695 | ||
696 | /* | |
697 | * Restore the thread interrupt contexts of initial CPUs. | |
698 | * | |
699 | * The context of hotplugged CPUs is restored later, by the | |
700 | * 'post_load' handler of the XiveTCTX model because they are not | |
701 | * available at the time the SpaprXive 'post_load' method is | |
702 | * called. We can not restore the context of all CPUs in the | |
703 | * 'post_load' handler of XiveTCTX because the machine is not | |
704 | * necessarily connected to the KVM device at that time. | |
705 | */ | |
706 | CPU_FOREACH(cs) { | |
707 | PowerPCCPU *cpu = POWERPC_CPU(cs); | |
708 | ||
709 | kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); | |
710 | if (local_err) { | |
711 | error_report_err(local_err); | |
712 | return -1; | |
713 | } | |
714 | } | |
715 | ||
716 | /* The source states will be restored when the machine starts running */ | |
717 | return 0; | |
718 | } | |
719 | ||
720 | static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, | |
721 | Error **errp) | |
722 | { | |
723 | void *addr; | |
724 | uint32_t page_shift = 16; /* TODO: fix page_shift */ | |
725 | ||
726 | addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, | |
727 | pgoff << page_shift); | |
728 | if (addr == MAP_FAILED) { | |
729 | error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); | |
730 | return NULL; | |
731 | } | |
732 | ||
733 | return addr; | |
734 | } | |
735 | ||
736 | /* | |
737 | * All the XIVE memory regions are now backed by mappings from the KVM | |
738 | * XIVE device. | |
739 | */ | |
740 | int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, | |
741 | Error **errp) | |
742 | { | |
743 | SpaprXive *xive = SPAPR_XIVE(intc); | |
744 | XiveSource *xsrc = &xive->source; | |
745 | Error *local_err = NULL; | |
746 | size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; | |
747 | size_t tima_len = 4ull << TM_SHIFT; | |
748 | CPUState *cs; | |
749 | ||
750 | /* | |
751 | * The KVM XIVE device already in use. This is the case when | |
752 | * rebooting under the XIVE-only interrupt mode. | |
753 | */ | |
754 | if (xive->fd != -1) { | |
755 | return 0; | |
756 | } | |
757 | ||
758 | if (!kvmppc_has_cap_xive()) { | |
759 | error_setg(errp, "IRQ_XIVE capability must be present for KVM"); | |
760 | return -1; | |
761 | } | |
762 | ||
763 | /* First, create the KVM XIVE device */ | |
764 | xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); | |
765 | if (xive->fd < 0) { | |
766 | error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); | |
767 | return -1; | |
768 | } | |
769 | ||
770 | /* Tell KVM about the # of VCPUs we may have */ | |
771 | if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, | |
772 | KVM_DEV_XIVE_NR_SERVERS)) { | |
773 | if (kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, | |
774 | KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, | |
775 | &local_err)) { | |
776 | goto fail; | |
777 | } | |
778 | } | |
779 | ||
780 | /* | |
781 | * 1. Source ESB pages - KVM mapping | |
782 | */ | |
783 | xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, | |
784 | &local_err); | |
785 | if (local_err) { | |
786 | goto fail; | |
787 | } | |
788 | ||
789 | memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), | |
790 | "xive.esb", esb_len, xsrc->esb_mmap); | |
791 | memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, | |
792 | &xsrc->esb_mmio_kvm, 1); | |
793 | ||
794 | /* | |
795 | * 2. END ESB pages (No KVM support yet) | |
796 | */ | |
797 | ||
798 | /* | |
799 | * 3. TIMA pages - KVM mapping | |
800 | */ | |
801 | xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, | |
802 | &local_err); | |
803 | if (local_err) { | |
804 | goto fail; | |
805 | } | |
806 | memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), | |
807 | "xive.tima", tima_len, xive->tm_mmap); | |
808 | memory_region_add_subregion_overlap(&xive->tm_mmio, 0, | |
809 | &xive->tm_mmio_kvm, 1); | |
810 | ||
811 | xive->change = qemu_add_vm_change_state_handler( | |
812 | kvmppc_xive_change_state_handler, xive); | |
813 | ||
814 | /* Connect the presenters to the initial VCPUs of the machine */ | |
815 | CPU_FOREACH(cs) { | |
816 | PowerPCCPU *cpu = POWERPC_CPU(cs); | |
817 | ||
818 | kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err); | |
819 | if (local_err) { | |
820 | goto fail; | |
821 | } | |
822 | } | |
823 | ||
824 | /* Update the KVM sources */ | |
825 | kvmppc_xive_source_reset(xsrc, &local_err); | |
826 | if (local_err) { | |
827 | goto fail; | |
828 | } | |
829 | ||
830 | kvm_kernel_irqchip = true; | |
831 | kvm_msi_via_irqfd_allowed = true; | |
832 | kvm_gsi_direct_mapping = true; | |
833 | return 0; | |
834 | ||
835 | fail: | |
836 | error_propagate(errp, local_err); | |
837 | kvmppc_xive_disconnect(intc); | |
838 | return -1; | |
839 | } | |
840 | ||
841 | void kvmppc_xive_disconnect(SpaprInterruptController *intc) | |
842 | { | |
843 | SpaprXive *xive = SPAPR_XIVE(intc); | |
844 | XiveSource *xsrc; | |
845 | size_t esb_len; | |
846 | ||
847 | /* The KVM XIVE device is not in use */ | |
848 | if (!xive || xive->fd == -1) { | |
849 | return; | |
850 | } | |
851 | ||
852 | /* Clear the KVM mapping */ | |
853 | xsrc = &xive->source; | |
854 | esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; | |
855 | ||
856 | if (xsrc->esb_mmap) { | |
857 | memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); | |
858 | object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); | |
859 | munmap(xsrc->esb_mmap, esb_len); | |
860 | xsrc->esb_mmap = NULL; | |
861 | } | |
862 | ||
863 | if (xive->tm_mmap) { | |
864 | memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); | |
865 | object_unparent(OBJECT(&xive->tm_mmio_kvm)); | |
866 | munmap(xive->tm_mmap, 4ull << TM_SHIFT); | |
867 | xive->tm_mmap = NULL; | |
868 | } | |
869 | ||
870 | /* | |
871 | * When the KVM device fd is closed, the KVM device is destroyed | |
872 | * and removed from the list of devices of the VM. The VCPU | |
873 | * presenters are also detached from the device. | |
874 | */ | |
875 | if (xive->fd != -1) { | |
876 | close(xive->fd); | |
877 | xive->fd = -1; | |
878 | } | |
879 | ||
880 | kvm_kernel_irqchip = false; | |
881 | kvm_msi_via_irqfd_allowed = false; | |
882 | kvm_gsi_direct_mapping = false; | |
883 | ||
884 | /* Clear the local list of presenter (hotplug) */ | |
885 | kvm_cpu_disable_all(); | |
886 | ||
887 | /* VM Change state handler is not needed anymore */ | |
888 | if (xive->change) { | |
889 | qemu_del_vm_change_state_handler(xive->change); | |
890 | xive->change = NULL; | |
891 | } | |
892 | } |