]>
Commit | Line | Data |
---|---|---|
721eecbf GH |
1 | /* |
2 | * kvm eventfd support - use eventfd objects to signal various KVM events | |
3 | * | |
4 | * Copyright 2009 Novell. All Rights Reserved. | |
221d059d | 5 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
721eecbf GH |
6 | * |
7 | * Author: | |
8 | * Gregory Haskins <ghaskins@novell.com> | |
9 | * | |
10 | * This file is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License | |
12 | * as published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, write to the Free Software Foundation, | |
21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
22 | */ | |
23 | ||
24 | #include <linux/kvm_host.h> | |
d34e6b17 | 25 | #include <linux/kvm.h> |
166c9775 | 26 | #include <linux/kvm_irqfd.h> |
721eecbf GH |
27 | #include <linux/workqueue.h> |
28 | #include <linux/syscalls.h> | |
29 | #include <linux/wait.h> | |
30 | #include <linux/poll.h> | |
31 | #include <linux/file.h> | |
32 | #include <linux/list.h> | |
33 | #include <linux/eventfd.h> | |
d34e6b17 | 34 | #include <linux/kernel.h> |
719d93cd | 35 | #include <linux/srcu.h> |
5a0e3ad6 | 36 | #include <linux/slab.h> |
56f89f36 | 37 | #include <linux/seqlock.h> |
9016cfb5 | 38 | #include <linux/irqbypass.h> |
e4d57e1e | 39 | #include <trace/events/kvm.h> |
d34e6b17 | 40 | |
af669ac6 | 41 | #include <kvm/iodev.h> |
721eecbf | 42 | |
297e2105 | 43 | #ifdef CONFIG_HAVE_KVM_IRQFD |
721eecbf | 44 | |
36343f6e | 45 | static struct workqueue_struct *irqfd_cleanup_wq; |
721eecbf | 46 | |
9898e051 PX |
47 | bool __attribute__((weak)) |
48 | kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) | |
49 | { | |
50 | return true; | |
51 | } | |
52 | ||
721eecbf GH |
53 | static void |
54 | irqfd_inject(struct work_struct *work) | |
55 | { | |
166c9775 EA |
56 | struct kvm_kernel_irqfd *irqfd = |
57 | container_of(work, struct kvm_kernel_irqfd, inject); | |
721eecbf GH |
58 | struct kvm *kvm = irqfd->kvm; |
59 | ||
7a84428a | 60 | if (!irqfd->resampler) { |
aa2fbe6d YZ |
61 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, |
62 | false); | |
63 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, | |
64 | false); | |
7a84428a AW |
65 | } else |
66 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | |
aa2fbe6d | 67 | irqfd->gsi, 1, false); |
7a84428a AW |
68 | } |
69 | ||
70 | /* | |
71 | * Since resampler irqfds share an IRQ source ID, we de-assert once | |
72 | * then notify all of the resampler irqfds using this GSI. We can't | |
73 | * do multiple de-asserts or we risk racing with incoming re-asserts. | |
74 | */ | |
75 | static void | |
76 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | |
77 | { | |
166c9775 | 78 | struct kvm_kernel_irqfd_resampler *resampler; |
719d93cd | 79 | struct kvm *kvm; |
166c9775 | 80 | struct kvm_kernel_irqfd *irqfd; |
719d93cd | 81 | int idx; |
7a84428a | 82 | |
166c9775 EA |
83 | resampler = container_of(kian, |
84 | struct kvm_kernel_irqfd_resampler, notifier); | |
719d93cd | 85 | kvm = resampler->kvm; |
7a84428a | 86 | |
719d93cd | 87 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, |
aa2fbe6d | 88 | resampler->notifier.gsi, 0, false); |
7a84428a | 89 | |
719d93cd | 90 | idx = srcu_read_lock(&kvm->irq_srcu); |
7a84428a AW |
91 | |
92 | list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) | |
93 | eventfd_signal(irqfd->resamplefd, 1); | |
94 | ||
719d93cd | 95 | srcu_read_unlock(&kvm->irq_srcu, idx); |
7a84428a AW |
96 | } |
97 | ||
98 | static void | |
166c9775 | 99 | irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) |
7a84428a | 100 | { |
166c9775 | 101 | struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; |
7a84428a AW |
102 | struct kvm *kvm = resampler->kvm; |
103 | ||
104 | mutex_lock(&kvm->irqfds.resampler_lock); | |
105 | ||
106 | list_del_rcu(&irqfd->resampler_link); | |
719d93cd | 107 | synchronize_srcu(&kvm->irq_srcu); |
7a84428a AW |
108 | |
109 | if (list_empty(&resampler->list)) { | |
110 | list_del(&resampler->link); | |
111 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); | |
112 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | |
aa2fbe6d | 113 | resampler->notifier.gsi, 0, false); |
7a84428a AW |
114 | kfree(resampler); |
115 | } | |
116 | ||
117 | mutex_unlock(&kvm->irqfds.resampler_lock); | |
721eecbf GH |
118 | } |
119 | ||
120 | /* | |
121 | * Race-free decouple logic (ordering is critical) | |
122 | */ | |
123 | static void | |
124 | irqfd_shutdown(struct work_struct *work) | |
125 | { | |
166c9775 EA |
126 | struct kvm_kernel_irqfd *irqfd = |
127 | container_of(work, struct kvm_kernel_irqfd, shutdown); | |
d35a3182 | 128 | struct kvm *kvm = irqfd->kvm; |
b6a114d2 | 129 | u64 cnt; |
721eecbf | 130 | |
d35a3182 LT |
131 | /* Make sure irqfd has been initalized in assign path. */ |
132 | synchronize_srcu(&kvm->irq_srcu); | |
133 | ||
721eecbf GH |
134 | /* |
135 | * Synchronize with the wait-queue and unhook ourselves to prevent | |
136 | * further events. | |
137 | */ | |
b6a114d2 | 138 | eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); |
721eecbf GH |
139 | |
140 | /* | |
141 | * We know no new events will be scheduled at this point, so block | |
142 | * until all previously outstanding events have completed | |
143 | */ | |
43829731 | 144 | flush_work(&irqfd->inject); |
721eecbf | 145 | |
7a84428a AW |
146 | if (irqfd->resampler) { |
147 | irqfd_resampler_shutdown(irqfd); | |
148 | eventfd_ctx_put(irqfd->resamplefd); | |
149 | } | |
150 | ||
721eecbf GH |
151 | /* |
152 | * It is now safe to release the object's resources | |
153 | */ | |
9016cfb5 EA |
154 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS |
155 | irq_bypass_unregister_consumer(&irqfd->consumer); | |
156 | #endif | |
721eecbf GH |
157 | eventfd_ctx_put(irqfd->eventfd); |
158 | kfree(irqfd); | |
159 | } | |
160 | ||
161 | ||
162 | /* assumes kvm->irqfds.lock is held */ | |
163 | static bool | |
166c9775 | 164 | irqfd_is_active(struct kvm_kernel_irqfd *irqfd) |
721eecbf GH |
165 | { |
166 | return list_empty(&irqfd->list) ? false : true; | |
167 | } | |
168 | ||
169 | /* | |
170 | * Mark the irqfd as inactive and schedule it for removal | |
171 | * | |
172 | * assumes kvm->irqfds.lock is held | |
173 | */ | |
174 | static void | |
166c9775 | 175 | irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) |
721eecbf GH |
176 | { |
177 | BUG_ON(!irqfd_is_active(irqfd)); | |
178 | ||
179 | list_del_init(&irqfd->list); | |
180 | ||
36343f6e | 181 | queue_work(irqfd_cleanup_wq, &irqfd->shutdown); |
721eecbf GH |
182 | } |
183 | ||
b97e6de9 | 184 | int __attribute__((weak)) kvm_arch_set_irq_inatomic( |
c9a5ecca AS |
185 | struct kvm_kernel_irq_routing_entry *irq, |
186 | struct kvm *kvm, int irq_source_id, | |
187 | int level, | |
188 | bool line_status) | |
189 | { | |
190 | return -EWOULDBLOCK; | |
191 | } | |
192 | ||
721eecbf GH |
193 | /* |
194 | * Called with wqh->lock held and interrupts disabled | |
195 | */ | |
196 | static int | |
ac6424b9 | 197 | irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) |
721eecbf | 198 | { |
166c9775 EA |
199 | struct kvm_kernel_irqfd *irqfd = |
200 | container_of(wait, struct kvm_kernel_irqfd, wait); | |
721eecbf | 201 | unsigned long flags = (unsigned long)key; |
56f89f36 | 202 | struct kvm_kernel_irq_routing_entry irq; |
bd2b53b2 | 203 | struct kvm *kvm = irqfd->kvm; |
56f89f36 | 204 | unsigned seq; |
719d93cd | 205 | int idx; |
721eecbf | 206 | |
bd2b53b2 | 207 | if (flags & POLLIN) { |
719d93cd | 208 | idx = srcu_read_lock(&kvm->irq_srcu); |
56f89f36 PM |
209 | do { |
210 | seq = read_seqcount_begin(&irqfd->irq_entry_sc); | |
211 | irq = irqfd->irq_entry; | |
212 | } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); | |
721eecbf | 213 | /* An event has been signaled, inject an interrupt */ |
b97e6de9 PB |
214 | if (kvm_arch_set_irq_inatomic(&irq, kvm, |
215 | KVM_USERSPACE_IRQ_SOURCE_ID, 1, | |
216 | false) == -EWOULDBLOCK) | |
bd2b53b2 | 217 | schedule_work(&irqfd->inject); |
719d93cd | 218 | srcu_read_unlock(&kvm->irq_srcu, idx); |
bd2b53b2 | 219 | } |
721eecbf GH |
220 | |
221 | if (flags & POLLHUP) { | |
222 | /* The eventfd is closing, detach from KVM */ | |
721eecbf GH |
223 | unsigned long flags; |
224 | ||
225 | spin_lock_irqsave(&kvm->irqfds.lock, flags); | |
226 | ||
227 | /* | |
228 | * We must check if someone deactivated the irqfd before | |
229 | * we could acquire the irqfds.lock since the item is | |
230 | * deactivated from the KVM side before it is unhooked from | |
231 | * the wait-queue. If it is already deactivated, we can | |
232 | * simply return knowing the other side will cleanup for us. | |
233 | * We cannot race against the irqfd going away since the | |
234 | * other side is required to acquire wqh->lock, which we hold | |
235 | */ | |
236 | if (irqfd_is_active(irqfd)) | |
237 | irqfd_deactivate(irqfd); | |
238 | ||
239 | spin_unlock_irqrestore(&kvm->irqfds.lock, flags); | |
240 | } | |
241 | ||
242 | return 0; | |
243 | } | |
244 | ||
245 | static void | |
246 | irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | |
247 | poll_table *pt) | |
248 | { | |
166c9775 EA |
249 | struct kvm_kernel_irqfd *irqfd = |
250 | container_of(pt, struct kvm_kernel_irqfd, pt); | |
721eecbf GH |
251 | add_wait_queue(wqh, &irqfd->wait); |
252 | } | |
253 | ||
bd2b53b2 | 254 | /* Must be called under irqfds.lock */ |
166c9775 | 255 | static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) |
bd2b53b2 MT |
256 | { |
257 | struct kvm_kernel_irq_routing_entry *e; | |
8ba918d4 | 258 | struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; |
351dc647 | 259 | int n_entries; |
8ba918d4 | 260 | |
9957c86d | 261 | n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); |
bd2b53b2 | 262 | |
56f89f36 PM |
263 | write_seqcount_begin(&irqfd->irq_entry_sc); |
264 | ||
8ba918d4 | 265 | e = entries; |
351dc647 AS |
266 | if (n_entries == 1) |
267 | irqfd->irq_entry = *e; | |
268 | else | |
269 | irqfd->irq_entry.type = 0; | |
56f89f36 | 270 | |
56f89f36 | 271 | write_seqcount_end(&irqfd->irq_entry_sc); |
bd2b53b2 MT |
272 | } |
273 | ||
1a02b270 EA |
274 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS |
275 | void __attribute__((weak)) kvm_arch_irq_bypass_stop( | |
276 | struct irq_bypass_consumer *cons) | |
277 | { | |
278 | } | |
279 | ||
280 | void __attribute__((weak)) kvm_arch_irq_bypass_start( | |
281 | struct irq_bypass_consumer *cons) | |
282 | { | |
283 | } | |
f70c20aa FW |
284 | |
285 | int __attribute__((weak)) kvm_arch_update_irqfd_routing( | |
286 | struct kvm *kvm, unsigned int host_irq, | |
287 | uint32_t guest_irq, bool set) | |
288 | { | |
289 | return 0; | |
290 | } | |
1a02b270 EA |
291 | #endif |
292 | ||
721eecbf | 293 | static int |
d4db2935 | 294 | kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) |
721eecbf | 295 | { |
166c9775 | 296 | struct kvm_kernel_irqfd *irqfd, *tmp; |
cffe78d9 | 297 | struct fd f; |
7a84428a | 298 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
721eecbf GH |
299 | int ret; |
300 | unsigned int events; | |
9957c86d | 301 | int idx; |
721eecbf | 302 | |
01c94e64 EA |
303 | if (!kvm_arch_intc_initialized(kvm)) |
304 | return -EAGAIN; | |
305 | ||
9898e051 PX |
306 | if (!kvm_arch_irqfd_allowed(kvm, args)) |
307 | return -EINVAL; | |
308 | ||
721eecbf GH |
309 | irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); |
310 | if (!irqfd) | |
311 | return -ENOMEM; | |
312 | ||
313 | irqfd->kvm = kvm; | |
d4db2935 | 314 | irqfd->gsi = args->gsi; |
721eecbf GH |
315 | INIT_LIST_HEAD(&irqfd->list); |
316 | INIT_WORK(&irqfd->inject, irqfd_inject); | |
317 | INIT_WORK(&irqfd->shutdown, irqfd_shutdown); | |
56f89f36 | 318 | seqcount_init(&irqfd->irq_entry_sc); |
721eecbf | 319 | |
cffe78d9 AV |
320 | f = fdget(args->fd); |
321 | if (!f.file) { | |
322 | ret = -EBADF; | |
323 | goto out; | |
721eecbf GH |
324 | } |
325 | ||
cffe78d9 | 326 | eventfd = eventfd_ctx_fileget(f.file); |
721eecbf GH |
327 | if (IS_ERR(eventfd)) { |
328 | ret = PTR_ERR(eventfd); | |
329 | goto fail; | |
330 | } | |
331 | ||
332 | irqfd->eventfd = eventfd; | |
333 | ||
7a84428a | 334 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { |
166c9775 | 335 | struct kvm_kernel_irqfd_resampler *resampler; |
7a84428a AW |
336 | |
337 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | |
338 | if (IS_ERR(resamplefd)) { | |
339 | ret = PTR_ERR(resamplefd); | |
340 | goto fail; | |
341 | } | |
342 | ||
343 | irqfd->resamplefd = resamplefd; | |
344 | INIT_LIST_HEAD(&irqfd->resampler_link); | |
345 | ||
346 | mutex_lock(&kvm->irqfds.resampler_lock); | |
347 | ||
348 | list_for_each_entry(resampler, | |
49f8a1a5 | 349 | &kvm->irqfds.resampler_list, link) { |
7a84428a AW |
350 | if (resampler->notifier.gsi == irqfd->gsi) { |
351 | irqfd->resampler = resampler; | |
352 | break; | |
353 | } | |
354 | } | |
355 | ||
356 | if (!irqfd->resampler) { | |
357 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | |
358 | if (!resampler) { | |
359 | ret = -ENOMEM; | |
360 | mutex_unlock(&kvm->irqfds.resampler_lock); | |
361 | goto fail; | |
362 | } | |
363 | ||
364 | resampler->kvm = kvm; | |
365 | INIT_LIST_HEAD(&resampler->list); | |
366 | resampler->notifier.gsi = irqfd->gsi; | |
367 | resampler->notifier.irq_acked = irqfd_resampler_ack; | |
368 | INIT_LIST_HEAD(&resampler->link); | |
369 | ||
370 | list_add(&resampler->link, &kvm->irqfds.resampler_list); | |
371 | kvm_register_irq_ack_notifier(kvm, | |
372 | &resampler->notifier); | |
373 | irqfd->resampler = resampler; | |
374 | } | |
375 | ||
376 | list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); | |
719d93cd | 377 | synchronize_srcu(&kvm->irq_srcu); |
7a84428a AW |
378 | |
379 | mutex_unlock(&kvm->irqfds.resampler_lock); | |
380 | } | |
381 | ||
721eecbf GH |
382 | /* |
383 | * Install our own custom wake-up handling so we are notified via | |
384 | * a callback whenever someone signals the underlying eventfd | |
385 | */ | |
386 | init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); | |
387 | init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); | |
388 | ||
f1d1c309 MT |
389 | spin_lock_irq(&kvm->irqfds.lock); |
390 | ||
391 | ret = 0; | |
392 | list_for_each_entry(tmp, &kvm->irqfds.items, list) { | |
393 | if (irqfd->eventfd != tmp->eventfd) | |
394 | continue; | |
395 | /* This fd is used for another irq already. */ | |
396 | ret = -EBUSY; | |
397 | spin_unlock_irq(&kvm->irqfds.lock); | |
398 | goto fail; | |
399 | } | |
400 | ||
9957c86d PM |
401 | idx = srcu_read_lock(&kvm->irq_srcu); |
402 | irqfd_update(kvm, irqfd); | |
bd2b53b2 | 403 | |
721eecbf | 404 | list_add_tail(&irqfd->list, &kvm->irqfds.items); |
721eecbf | 405 | |
684a0b71 CH |
406 | spin_unlock_irq(&kvm->irqfds.lock); |
407 | ||
721eecbf GH |
408 | /* |
409 | * Check if there was an event already pending on the eventfd | |
410 | * before we registered, and trigger it as if we didn't miss it. | |
411 | */ | |
684a0b71 CH |
412 | events = f.file->f_op->poll(f.file, &irqfd->pt); |
413 | ||
721eecbf GH |
414 | if (events & POLLIN) |
415 | schedule_work(&irqfd->inject); | |
416 | ||
9016cfb5 | 417 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS |
14717e20 AW |
418 | if (kvm_arch_has_irq_bypass()) { |
419 | irqfd->consumer.token = (void *)irqfd->eventfd; | |
420 | irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; | |
421 | irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; | |
422 | irqfd->consumer.stop = kvm_arch_irq_bypass_stop; | |
423 | irqfd->consumer.start = kvm_arch_irq_bypass_start; | |
424 | ret = irq_bypass_register_consumer(&irqfd->consumer); | |
425 | if (ret) | |
426 | pr_info("irq bypass consumer (token %p) registration fails: %d\n", | |
9016cfb5 | 427 | irqfd->consumer.token, ret); |
14717e20 | 428 | } |
9016cfb5 | 429 | #endif |
721eecbf | 430 | |
d35a3182 | 431 | srcu_read_unlock(&kvm->irq_srcu, idx); |
033c9a53 PB |
432 | |
433 | /* | |
434 | * do not drop the file until the irqfd is fully initialized, otherwise | |
435 | * we might race against the EPOLLHUP | |
436 | */ | |
437 | fdput(f); | |
721eecbf GH |
438 | return 0; |
439 | ||
440 | fail: | |
7a84428a AW |
441 | if (irqfd->resampler) |
442 | irqfd_resampler_shutdown(irqfd); | |
443 | ||
444 | if (resamplefd && !IS_ERR(resamplefd)) | |
445 | eventfd_ctx_put(resamplefd); | |
446 | ||
721eecbf GH |
447 | if (eventfd && !IS_ERR(eventfd)) |
448 | eventfd_ctx_put(eventfd); | |
449 | ||
cffe78d9 | 450 | fdput(f); |
721eecbf | 451 | |
cffe78d9 | 452 | out: |
721eecbf GH |
453 | kfree(irqfd); |
454 | return ret; | |
455 | } | |
c77dcacb PB |
456 | |
457 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | |
458 | { | |
459 | struct kvm_irq_ack_notifier *kian; | |
460 | int gsi, idx; | |
461 | ||
462 | idx = srcu_read_lock(&kvm->irq_srcu); | |
463 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); | |
464 | if (gsi != -1) | |
465 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | |
466 | link) | |
467 | if (kian->gsi == gsi) { | |
468 | srcu_read_unlock(&kvm->irq_srcu, idx); | |
469 | return true; | |
470 | } | |
471 | ||
472 | srcu_read_unlock(&kvm->irq_srcu, idx); | |
473 | ||
474 | return false; | |
475 | } | |
476 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | |
477 | ||
ba1aefcd | 478 | void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) |
c77dcacb PB |
479 | { |
480 | struct kvm_irq_ack_notifier *kian; | |
ba1aefcd AS |
481 | |
482 | hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, | |
483 | link) | |
484 | if (kian->gsi == gsi) | |
485 | kian->irq_acked(kian); | |
486 | } | |
487 | ||
488 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | |
489 | { | |
c77dcacb PB |
490 | int gsi, idx; |
491 | ||
492 | trace_kvm_ack_irq(irqchip, pin); | |
493 | ||
494 | idx = srcu_read_lock(&kvm->irq_srcu); | |
495 | gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); | |
496 | if (gsi != -1) | |
ba1aefcd | 497 | kvm_notify_acked_gsi(kvm, gsi); |
c77dcacb PB |
498 | srcu_read_unlock(&kvm->irq_srcu, idx); |
499 | } | |
500 | ||
501 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | |
502 | struct kvm_irq_ack_notifier *kian) | |
503 | { | |
504 | mutex_lock(&kvm->irq_lock); | |
505 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | |
506 | mutex_unlock(&kvm->irq_lock); | |
993225ad | 507 | kvm_arch_post_irq_ack_notifier_list_update(kvm); |
c77dcacb PB |
508 | } |
509 | ||
510 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |
511 | struct kvm_irq_ack_notifier *kian) | |
512 | { | |
513 | mutex_lock(&kvm->irq_lock); | |
514 | hlist_del_init_rcu(&kian->link); | |
515 | mutex_unlock(&kvm->irq_lock); | |
516 | synchronize_srcu(&kvm->irq_srcu); | |
993225ad | 517 | kvm_arch_post_irq_ack_notifier_list_update(kvm); |
c77dcacb | 518 | } |
914daba8 | 519 | #endif |
721eecbf GH |
520 | |
521 | void | |
d34e6b17 | 522 | kvm_eventfd_init(struct kvm *kvm) |
721eecbf | 523 | { |
297e2105 | 524 | #ifdef CONFIG_HAVE_KVM_IRQFD |
721eecbf GH |
525 | spin_lock_init(&kvm->irqfds.lock); |
526 | INIT_LIST_HEAD(&kvm->irqfds.items); | |
7a84428a AW |
527 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); |
528 | mutex_init(&kvm->irqfds.resampler_lock); | |
914daba8 | 529 | #endif |
d34e6b17 | 530 | INIT_LIST_HEAD(&kvm->ioeventfds); |
721eecbf GH |
531 | } |
532 | ||
297e2105 | 533 | #ifdef CONFIG_HAVE_KVM_IRQFD |
721eecbf GH |
534 | /* |
535 | * shutdown any irqfd's that match fd+gsi | |
536 | */ | |
537 | static int | |
d4db2935 | 538 | kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) |
721eecbf | 539 | { |
166c9775 | 540 | struct kvm_kernel_irqfd *irqfd, *tmp; |
721eecbf GH |
541 | struct eventfd_ctx *eventfd; |
542 | ||
d4db2935 | 543 | eventfd = eventfd_ctx_fdget(args->fd); |
721eecbf GH |
544 | if (IS_ERR(eventfd)) |
545 | return PTR_ERR(eventfd); | |
546 | ||
547 | spin_lock_irq(&kvm->irqfds.lock); | |
548 | ||
549 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { | |
d4db2935 | 550 | if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { |
bd2b53b2 | 551 | /* |
56f89f36 | 552 | * This clearing of irq_entry.type is needed for when |
c8ce057e MT |
553 | * another thread calls kvm_irq_routing_update before |
554 | * we flush workqueue below (we synchronize with | |
555 | * kvm_irq_routing_update using irqfds.lock). | |
bd2b53b2 | 556 | */ |
56f89f36 PM |
557 | write_seqcount_begin(&irqfd->irq_entry_sc); |
558 | irqfd->irq_entry.type = 0; | |
559 | write_seqcount_end(&irqfd->irq_entry_sc); | |
721eecbf | 560 | irqfd_deactivate(irqfd); |
bd2b53b2 | 561 | } |
721eecbf GH |
562 | } |
563 | ||
564 | spin_unlock_irq(&kvm->irqfds.lock); | |
565 | eventfd_ctx_put(eventfd); | |
566 | ||
567 | /* | |
568 | * Block until we know all outstanding shutdown jobs have completed | |
569 | * so that we guarantee there will not be any more interrupts on this | |
570 | * gsi once this deassign function returns. | |
571 | */ | |
36343f6e | 572 | flush_workqueue(irqfd_cleanup_wq); |
721eecbf GH |
573 | |
574 | return 0; | |
575 | } | |
576 | ||
577 | int | |
d4db2935 | 578 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) |
721eecbf | 579 | { |
7a84428a | 580 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) |
326cf033 AW |
581 | return -EINVAL; |
582 | ||
d4db2935 AW |
583 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) |
584 | return kvm_irqfd_deassign(kvm, args); | |
721eecbf | 585 | |
d4db2935 | 586 | return kvm_irqfd_assign(kvm, args); |
721eecbf GH |
587 | } |
588 | ||
589 | /* | |
590 | * This function is called as the kvm VM fd is being released. Shutdown all | |
591 | * irqfds that still remain open | |
592 | */ | |
593 | void | |
594 | kvm_irqfd_release(struct kvm *kvm) | |
595 | { | |
166c9775 | 596 | struct kvm_kernel_irqfd *irqfd, *tmp; |
721eecbf GH |
597 | |
598 | spin_lock_irq(&kvm->irqfds.lock); | |
599 | ||
600 | list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) | |
601 | irqfd_deactivate(irqfd); | |
602 | ||
603 | spin_unlock_irq(&kvm->irqfds.lock); | |
604 | ||
605 | /* | |
606 | * Block until we know all outstanding shutdown jobs have completed | |
607 | * since we do not take a kvm* reference. | |
608 | */ | |
36343f6e | 609 | flush_workqueue(irqfd_cleanup_wq); |
721eecbf GH |
610 | |
611 | } | |
612 | ||
bd2b53b2 | 613 | /* |
9957c86d | 614 | * Take note of a change in irq routing. |
719d93cd | 615 | * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. |
bd2b53b2 | 616 | */ |
9957c86d | 617 | void kvm_irq_routing_update(struct kvm *kvm) |
bd2b53b2 | 618 | { |
166c9775 | 619 | struct kvm_kernel_irqfd *irqfd; |
bd2b53b2 MT |
620 | |
621 | spin_lock_irq(&kvm->irqfds.lock); | |
622 | ||
f70c20aa | 623 | list_for_each_entry(irqfd, &kvm->irqfds.items, list) { |
9957c86d | 624 | irqfd_update(kvm, irqfd); |
bd2b53b2 | 625 | |
f70c20aa FW |
626 | #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS |
627 | if (irqfd->producer) { | |
628 | int ret = kvm_arch_update_irqfd_routing( | |
629 | irqfd->kvm, irqfd->producer->irq, | |
630 | irqfd->gsi, 1); | |
631 | WARN_ON(ret); | |
632 | } | |
633 | #endif | |
634 | } | |
635 | ||
bd2b53b2 MT |
636 | spin_unlock_irq(&kvm->irqfds.lock); |
637 | } | |
638 | ||
36343f6e PB |
639 | /* |
640 | * create a host-wide workqueue for issuing deferred shutdown requests | |
641 | * aggregated from all vm* instances. We need our own isolated | |
642 | * queue to ease flushing work items when a VM exits. | |
643 | */ | |
644 | int kvm_irqfd_init(void) | |
645 | { | |
646 | irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); | |
647 | if (!irqfd_cleanup_wq) | |
648 | return -ENOMEM; | |
649 | ||
650 | return 0; | |
651 | } | |
652 | ||
a0f155e9 | 653 | void kvm_irqfd_exit(void) |
721eecbf | 654 | { |
36343f6e | 655 | destroy_workqueue(irqfd_cleanup_wq); |
721eecbf | 656 | } |
914daba8 | 657 | #endif |
d34e6b17 GH |
658 | |
659 | /* | |
660 | * -------------------------------------------------------------------- | |
661 | * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. | |
662 | * | |
663 | * userspace can register a PIO/MMIO address with an eventfd for receiving | |
664 | * notification when the memory has been touched. | |
665 | * -------------------------------------------------------------------- | |
666 | */ | |
667 | ||
668 | struct _ioeventfd { | |
669 | struct list_head list; | |
670 | u64 addr; | |
671 | int length; | |
672 | struct eventfd_ctx *eventfd; | |
673 | u64 datamatch; | |
674 | struct kvm_io_device dev; | |
05e07f9b | 675 | u8 bus_idx; |
d34e6b17 GH |
676 | bool wildcard; |
677 | }; | |
678 | ||
679 | static inline struct _ioeventfd * | |
680 | to_ioeventfd(struct kvm_io_device *dev) | |
681 | { | |
682 | return container_of(dev, struct _ioeventfd, dev); | |
683 | } | |
684 | ||
685 | static void | |
686 | ioeventfd_release(struct _ioeventfd *p) | |
687 | { | |
688 | eventfd_ctx_put(p->eventfd); | |
689 | list_del(&p->list); | |
690 | kfree(p); | |
691 | } | |
692 | ||
693 | static bool | |
694 | ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) | |
695 | { | |
696 | u64 _val; | |
697 | ||
f848a5a8 MT |
698 | if (addr != p->addr) |
699 | /* address must be precise for a hit */ | |
700 | return false; | |
701 | ||
702 | if (!p->length) | |
703 | /* length = 0 means only look at the address, so always a hit */ | |
704 | return true; | |
705 | ||
706 | if (len != p->length) | |
d34e6b17 GH |
707 | /* address-range must be precise for a hit */ |
708 | return false; | |
709 | ||
710 | if (p->wildcard) | |
711 | /* all else equal, wildcard is always a hit */ | |
712 | return true; | |
713 | ||
714 | /* otherwise, we have to actually compare the data */ | |
715 | ||
716 | BUG_ON(!IS_ALIGNED((unsigned long)val, len)); | |
717 | ||
718 | switch (len) { | |
719 | case 1: | |
720 | _val = *(u8 *)val; | |
721 | break; | |
722 | case 2: | |
723 | _val = *(u16 *)val; | |
724 | break; | |
725 | case 4: | |
726 | _val = *(u32 *)val; | |
727 | break; | |
728 | case 8: | |
729 | _val = *(u64 *)val; | |
730 | break; | |
731 | default: | |
732 | return false; | |
733 | } | |
734 | ||
735 | return _val == p->datamatch ? true : false; | |
736 | } | |
737 | ||
738 | /* MMIO/PIO writes trigger an event if the addr/val match */ | |
739 | static int | |
e32edf4f NN |
740 | ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, |
741 | int len, const void *val) | |
d34e6b17 GH |
742 | { |
743 | struct _ioeventfd *p = to_ioeventfd(this); | |
744 | ||
745 | if (!ioeventfd_in_range(p, addr, len, val)) | |
746 | return -EOPNOTSUPP; | |
747 | ||
748 | eventfd_signal(p->eventfd, 1); | |
749 | return 0; | |
750 | } | |
751 | ||
752 | /* | |
753 | * This function is called as KVM is completely shutting down. We do not | |
754 | * need to worry about locking just nuke anything we have as quickly as possible | |
755 | */ | |
756 | static void | |
757 | ioeventfd_destructor(struct kvm_io_device *this) | |
758 | { | |
759 | struct _ioeventfd *p = to_ioeventfd(this); | |
760 | ||
761 | ioeventfd_release(p); | |
762 | } | |
763 | ||
764 | static const struct kvm_io_device_ops ioeventfd_ops = { | |
765 | .write = ioeventfd_write, | |
766 | .destructor = ioeventfd_destructor, | |
767 | }; | |
768 | ||
769 | /* assumes kvm->slots_lock held */ | |
770 | static bool | |
771 | ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) | |
772 | { | |
773 | struct _ioeventfd *_p; | |
774 | ||
775 | list_for_each_entry(_p, &kvm->ioeventfds, list) | |
05e07f9b | 776 | if (_p->bus_idx == p->bus_idx && |
f848a5a8 MT |
777 | _p->addr == p->addr && |
778 | (!_p->length || !p->length || | |
779 | (_p->length == p->length && | |
780 | (_p->wildcard || p->wildcard || | |
781 | _p->datamatch == p->datamatch)))) | |
d34e6b17 GH |
782 | return true; |
783 | ||
784 | return false; | |
785 | } | |
786 | ||
2b83451b CH |
787 | static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) |
788 | { | |
789 | if (flags & KVM_IOEVENTFD_FLAG_PIO) | |
790 | return KVM_PIO_BUS; | |
791 | if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) | |
792 | return KVM_VIRTIO_CCW_NOTIFY_BUS; | |
793 | return KVM_MMIO_BUS; | |
794 | } | |
795 | ||
85da11ca JW |
796 | static int kvm_assign_ioeventfd_idx(struct kvm *kvm, |
797 | enum kvm_bus bus_idx, | |
798 | struct kvm_ioeventfd *args) | |
d34e6b17 | 799 | { |
d34e6b17 | 800 | |
85da11ca JW |
801 | struct eventfd_ctx *eventfd; |
802 | struct _ioeventfd *p; | |
803 | int ret; | |
f848a5a8 | 804 | |
d34e6b17 GH |
805 | eventfd = eventfd_ctx_fdget(args->fd); |
806 | if (IS_ERR(eventfd)) | |
807 | return PTR_ERR(eventfd); | |
808 | ||
809 | p = kzalloc(sizeof(*p), GFP_KERNEL); | |
810 | if (!p) { | |
811 | ret = -ENOMEM; | |
812 | goto fail; | |
813 | } | |
814 | ||
815 | INIT_LIST_HEAD(&p->list); | |
816 | p->addr = args->addr; | |
05e07f9b | 817 | p->bus_idx = bus_idx; |
d34e6b17 GH |
818 | p->length = args->len; |
819 | p->eventfd = eventfd; | |
820 | ||
821 | /* The datamatch feature is optional, otherwise this is a wildcard */ | |
822 | if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) | |
823 | p->datamatch = args->datamatch; | |
824 | else | |
825 | p->wildcard = true; | |
826 | ||
79fac95e | 827 | mutex_lock(&kvm->slots_lock); |
d34e6b17 | 828 | |
25985edc | 829 | /* Verify that there isn't a match already */ |
d34e6b17 GH |
830 | if (ioeventfd_check_collision(kvm, p)) { |
831 | ret = -EEXIST; | |
832 | goto unlock_fail; | |
833 | } | |
834 | ||
835 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | |
836 | ||
743eeb0b SL |
837 | ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, |
838 | &p->dev); | |
d34e6b17 GH |
839 | if (ret < 0) |
840 | goto unlock_fail; | |
841 | ||
4a12f951 | 842 | kvm_get_bus(kvm, bus_idx)->ioeventfd_count++; |
d34e6b17 GH |
843 | list_add_tail(&p->list, &kvm->ioeventfds); |
844 | ||
79fac95e | 845 | mutex_unlock(&kvm->slots_lock); |
d34e6b17 GH |
846 | |
847 | return 0; | |
848 | ||
849 | unlock_fail: | |
79fac95e | 850 | mutex_unlock(&kvm->slots_lock); |
d34e6b17 GH |
851 | |
852 | fail: | |
853 | kfree(p); | |
854 | eventfd_ctx_put(eventfd); | |
855 | ||
856 | return ret; | |
857 | } | |
858 | ||
859 | static int | |
85da11ca JW |
860 | kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, |
861 | struct kvm_ioeventfd *args) | |
d34e6b17 | 862 | { |
d34e6b17 GH |
863 | struct _ioeventfd *p, *tmp; |
864 | struct eventfd_ctx *eventfd; | |
4a12f951 | 865 | struct kvm_io_bus *bus; |
d34e6b17 GH |
866 | int ret = -ENOENT; |
867 | ||
868 | eventfd = eventfd_ctx_fdget(args->fd); | |
869 | if (IS_ERR(eventfd)) | |
870 | return PTR_ERR(eventfd); | |
871 | ||
79fac95e | 872 | mutex_lock(&kvm->slots_lock); |
d34e6b17 GH |
873 | |
874 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | |
875 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | |
876 | ||
05e07f9b MT |
877 | if (p->bus_idx != bus_idx || |
878 | p->eventfd != eventfd || | |
d34e6b17 GH |
879 | p->addr != args->addr || |
880 | p->length != args->len || | |
881 | p->wildcard != wildcard) | |
882 | continue; | |
883 | ||
884 | if (!p->wildcard && p->datamatch != args->datamatch) | |
885 | continue; | |
886 | ||
e93f8a0f | 887 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
4a12f951 CB |
888 | bus = kvm_get_bus(kvm, bus_idx); |
889 | if (bus) | |
890 | bus->ioeventfd_count--; | |
d34e6b17 GH |
891 | ioeventfd_release(p); |
892 | ret = 0; | |
893 | break; | |
894 | } | |
895 | ||
79fac95e | 896 | mutex_unlock(&kvm->slots_lock); |
d34e6b17 GH |
897 | |
898 | eventfd_ctx_put(eventfd); | |
899 | ||
900 | return ret; | |
901 | } | |
902 | ||
85da11ca JW |
903 | static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
904 | { | |
905 | enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); | |
eefd6b06 JW |
906 | int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); |
907 | ||
908 | if (!args->len && bus_idx == KVM_MMIO_BUS) | |
909 | kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); | |
85da11ca | 910 | |
eefd6b06 | 911 | return ret; |
85da11ca JW |
912 | } |
913 | ||
914 | static int | |
915 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |
916 | { | |
917 | enum kvm_bus bus_idx; | |
eefd6b06 | 918 | int ret; |
85da11ca JW |
919 | |
920 | bus_idx = ioeventfd_bus_from_flags(args->flags); | |
921 | /* must be natural-word sized, or 0 to ignore length */ | |
922 | switch (args->len) { | |
923 | case 0: | |
924 | case 1: | |
925 | case 2: | |
926 | case 4: | |
927 | case 8: | |
928 | break; | |
929 | default: | |
930 | return -EINVAL; | |
931 | } | |
932 | ||
933 | /* check for range overflow */ | |
934 | if (args->addr + args->len < args->addr) | |
935 | return -EINVAL; | |
936 | ||
937 | /* check for extra flags that we don't understand */ | |
938 | if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) | |
939 | return -EINVAL; | |
940 | ||
941 | /* ioeventfd with no length can't be combined with DATAMATCH */ | |
e9ea5069 | 942 | if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) |
85da11ca JW |
943 | return -EINVAL; |
944 | ||
eefd6b06 JW |
945 | ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); |
946 | if (ret) | |
947 | goto fail; | |
948 | ||
949 | /* When length is ignored, MMIO is also put on a separate bus, for | |
950 | * faster lookups. | |
951 | */ | |
952 | if (!args->len && bus_idx == KVM_MMIO_BUS) { | |
953 | ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); | |
954 | if (ret < 0) | |
955 | goto fast_fail; | |
956 | } | |
957 | ||
958 | return 0; | |
959 | ||
960 | fast_fail: | |
961 | kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); | |
962 | fail: | |
963 | return ret; | |
85da11ca JW |
964 | } |
965 | ||
d34e6b17 GH |
966 | int |
967 | kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |
968 | { | |
969 | if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) | |
970 | return kvm_deassign_ioeventfd(kvm, args); | |
971 | ||
972 | return kvm_assign_ioeventfd(kvm, args); | |
973 | } |