]>
Commit | Line | Data |
---|---|---|
3343660d AL |
1 | /* |
2 | * Virtio PCI driver | |
3 | * | |
4 | * This module allows virtio devices to be used over a virtual PCI device. | |
5 | * This can be used with QEMU based VMMs like KVM or Xen. | |
6 | * | |
7 | * Copyright IBM Corp. 2007 | |
8 | * | |
9 | * Authors: | |
10 | * Anthony Liguori <aliguori@us.ibm.com> | |
11 | * | |
12 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
13 | * See the COPYING file in the top-level directory. | |
14 | * | |
15 | */ | |
16 | ||
38eb4a29 | 17 | #include "virtio_pci_legacy.c" |
3343660d | 18 | |
e6af578c | 19 | /* wait for pending irq handlers */ |
38eb4a29 | 20 | void vp_synchronize_vectors(struct virtio_device *vdev) |
e6af578c MT |
21 | { |
22 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
23 | int i; | |
24 | ||
25 | if (vp_dev->intx_enabled) | |
26 | synchronize_irq(vp_dev->pci_dev->irq); | |
27 | ||
28 | for (i = 0; i < vp_dev->msix_vectors; ++i) | |
29 | synchronize_irq(vp_dev->msix_entries[i].vector); | |
30 | } | |
31 | ||
3343660d | 32 | /* the notify function used when creating a virt queue */ |
38eb4a29 | 33 | bool vp_notify(struct virtqueue *vq) |
3343660d | 34 | { |
3343660d AL |
35 | /* we write the queue's selector into the notification register to |
36 | * signal the other end */ | |
f30eaf4a | 37 | iowrite16(vq->index, (void __iomem *)vq->priv); |
46f9c2b9 | 38 | return true; |
3343660d AL |
39 | } |
40 | ||
77cf5246 MT |
41 | /* Handle a configuration change: Tell driver if it wants to know. */ |
42 | static irqreturn_t vp_config_changed(int irq, void *opaque) | |
43 | { | |
44 | struct virtio_pci_device *vp_dev = opaque; | |
77cf5246 | 45 | |
016c98c6 | 46 | virtio_config_changed(&vp_dev->vdev); |
77cf5246 MT |
47 | return IRQ_HANDLED; |
48 | } | |
49 | ||
50 | /* Notify all virtqueues on an interrupt. */ | |
51 | static irqreturn_t vp_vring_interrupt(int irq, void *opaque) | |
52 | { | |
53 | struct virtio_pci_device *vp_dev = opaque; | |
54 | struct virtio_pci_vq_info *info; | |
55 | irqreturn_t ret = IRQ_NONE; | |
56 | unsigned long flags; | |
57 | ||
58 | spin_lock_irqsave(&vp_dev->lock, flags); | |
59 | list_for_each_entry(info, &vp_dev->virtqueues, node) { | |
60 | if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) | |
61 | ret = IRQ_HANDLED; | |
62 | } | |
63 | spin_unlock_irqrestore(&vp_dev->lock, flags); | |
64 | ||
65 | return ret; | |
66 | } | |
67 | ||
3343660d AL |
68 | /* A small wrapper to also acknowledge the interrupt when it's handled. |
69 | * I really need an EIO hook for the vring so I can ack the interrupt once we | |
70 | * know that we'll be handling the IRQ but before we invoke the callback since | |
71 | * the callback may notify the host which results in the host attempting to | |
72 | * raise an interrupt that we would then mask once we acknowledged the | |
73 | * interrupt. */ | |
74 | static irqreturn_t vp_interrupt(int irq, void *opaque) | |
75 | { | |
76 | struct virtio_pci_device *vp_dev = opaque; | |
3343660d AL |
77 | u8 isr; |
78 | ||
79 | /* reading the ISR has the effect of also clearing it so it's very | |
80 | * important to save off the value. */ | |
af535722 | 81 | isr = ioread8(vp_dev->isr); |
3343660d AL |
82 | |
83 | /* It's definitely not us if the ISR was not high */ | |
84 | if (!isr) | |
85 | return IRQ_NONE; | |
86 | ||
87 | /* Configuration change? Tell driver if it wants to know. */ | |
77cf5246 MT |
88 | if (isr & VIRTIO_PCI_ISR_CONFIG) |
89 | vp_config_changed(irq, opaque); | |
3343660d | 90 | |
77cf5246 | 91 | return vp_vring_interrupt(irq, opaque); |
3343660d AL |
92 | } |
93 | ||
82af8ce8 MT |
94 | static void vp_free_vectors(struct virtio_device *vdev) |
95 | { | |
96 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
97 | int i; | |
98 | ||
99 | if (vp_dev->intx_enabled) { | |
100 | free_irq(vp_dev->pci_dev->irq, vp_dev); | |
101 | vp_dev->intx_enabled = 0; | |
102 | } | |
103 | ||
104 | for (i = 0; i < vp_dev->msix_used_vectors; ++i) | |
105 | free_irq(vp_dev->msix_entries[i].vector, vp_dev); | |
82af8ce8 | 106 | |
75a0a52b JW |
107 | for (i = 0; i < vp_dev->msix_vectors; i++) |
108 | if (vp_dev->msix_affinity_masks[i]) | |
109 | free_cpumask_var(vp_dev->msix_affinity_masks[i]); | |
110 | ||
82af8ce8 MT |
111 | if (vp_dev->msix_enabled) { |
112 | /* Disable the vector used for configuration */ | |
6f8f23d6 | 113 | vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); |
82af8ce8 | 114 | |
82af8ce8 | 115 | pci_disable_msix(vp_dev->pci_dev); |
ff52c3fc | 116 | vp_dev->msix_enabled = 0; |
82af8ce8 | 117 | } |
ff52c3fc | 118 | |
f11335db | 119 | vp_dev->msix_vectors = 0; |
ff52c3fc MT |
120 | vp_dev->msix_used_vectors = 0; |
121 | kfree(vp_dev->msix_names); | |
122 | vp_dev->msix_names = NULL; | |
123 | kfree(vp_dev->msix_entries); | |
124 | vp_dev->msix_entries = NULL; | |
75a0a52b JW |
125 | kfree(vp_dev->msix_affinity_masks); |
126 | vp_dev->msix_affinity_masks = NULL; | |
82af8ce8 MT |
127 | } |
128 | ||
f68d2408 RR |
129 | static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, |
130 | bool per_vq_vectors) | |
82af8ce8 MT |
131 | { |
132 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
133 | const char *name = dev_name(&vp_dev->vdev.dev); | |
134 | unsigned i, v; | |
135 | int err = -ENOMEM; | |
e969fed5 | 136 | |
f11335db AV |
137 | vp_dev->msix_vectors = nvectors; |
138 | ||
82af8ce8 MT |
139 | vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries, |
140 | GFP_KERNEL); | |
141 | if (!vp_dev->msix_entries) | |
ff52c3fc | 142 | goto error; |
82af8ce8 MT |
143 | vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names, |
144 | GFP_KERNEL); | |
145 | if (!vp_dev->msix_names) | |
ff52c3fc | 146 | goto error; |
75a0a52b JW |
147 | vp_dev->msix_affinity_masks |
148 | = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks, | |
149 | GFP_KERNEL); | |
150 | if (!vp_dev->msix_affinity_masks) | |
151 | goto error; | |
152 | for (i = 0; i < nvectors; ++i) | |
153 | if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], | |
154 | GFP_KERNEL)) | |
155 | goto error; | |
82af8ce8 MT |
156 | |
157 | for (i = 0; i < nvectors; ++i) | |
158 | vp_dev->msix_entries[i].entry = i; | |
159 | ||
5e37f670 AG |
160 | err = pci_enable_msix_exact(vp_dev->pci_dev, |
161 | vp_dev->msix_entries, nvectors); | |
e969fed5 MT |
162 | if (err) |
163 | goto error; | |
e969fed5 MT |
164 | vp_dev->msix_enabled = 1; |
165 | ||
166 | /* Set the vector used for configuration */ | |
167 | v = vp_dev->msix_used_vectors; | |
168 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | |
169 | "%s-config", name); | |
170 | err = request_irq(vp_dev->msix_entries[v].vector, | |
171 | vp_config_changed, 0, vp_dev->msix_names[v], | |
172 | vp_dev); | |
173 | if (err) | |
174 | goto error; | |
175 | ++vp_dev->msix_used_vectors; | |
82af8ce8 | 176 | |
6f8f23d6 | 177 | v = vp_dev->config_vector(vp_dev, v); |
e969fed5 | 178 | /* Verify we had enough resources to assign the vector */ |
e969fed5 MT |
179 | if (v == VIRTIO_MSI_NO_VECTOR) { |
180 | err = -EBUSY; | |
181 | goto error; | |
82af8ce8 MT |
182 | } |
183 | ||
e969fed5 | 184 | if (!per_vq_vectors) { |
82af8ce8 MT |
185 | /* Shared vector for all VQs */ |
186 | v = vp_dev->msix_used_vectors; | |
187 | snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, | |
188 | "%s-virtqueues", name); | |
189 | err = request_irq(vp_dev->msix_entries[v].vector, | |
190 | vp_vring_interrupt, 0, vp_dev->msix_names[v], | |
191 | vp_dev); | |
192 | if (err) | |
ff52c3fc | 193 | goto error; |
82af8ce8 MT |
194 | ++vp_dev->msix_used_vectors; |
195 | } | |
196 | return 0; | |
ff52c3fc | 197 | error: |
82af8ce8 | 198 | vp_free_vectors(vdev); |
82af8ce8 MT |
199 | return err; |
200 | } | |
201 | ||
f68d2408 RR |
202 | static int vp_request_intx(struct virtio_device *vdev) |
203 | { | |
204 | int err; | |
205 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
206 | ||
207 | err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, | |
208 | IRQF_SHARED, dev_name(&vdev->dev), vp_dev); | |
209 | if (!err) | |
210 | vp_dev->intx_enabled = 1; | |
211 | return err; | |
212 | } | |
213 | ||
b09f00bb MT |
214 | static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, |
215 | void (*callback)(struct virtqueue *vq), | |
216 | const char *name, | |
217 | u16 msix_vec) | |
218 | { | |
219 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
220 | struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); | |
221 | struct virtqueue *vq; | |
222 | unsigned long flags; | |
223 | ||
224 | /* fill out our structure that represents an active queue */ | |
225 | if (!info) | |
226 | return ERR_PTR(-ENOMEM); | |
227 | ||
228 | vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec); | |
229 | if (IS_ERR(vq)) | |
230 | goto out_info; | |
231 | ||
232 | info->vq = vq; | |
005b20a8 KK |
233 | if (callback) { |
234 | spin_lock_irqsave(&vp_dev->lock, flags); | |
235 | list_add(&info->node, &vp_dev->virtqueues); | |
236 | spin_unlock_irqrestore(&vp_dev->lock, flags); | |
237 | } else { | |
238 | INIT_LIST_HEAD(&info->node); | |
239 | } | |
3343660d | 240 | |
3ec7a77b | 241 | vp_dev->vqs[index] = info; |
3343660d AL |
242 | return vq; |
243 | ||
3343660d AL |
244 | out_info: |
245 | kfree(info); | |
b09f00bb | 246 | return vq; |
3343660d AL |
247 | } |
248 | ||
5386cef2 MT |
249 | static void vp_del_vq(struct virtqueue *vq) |
250 | { | |
251 | struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); | |
252 | struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; | |
253 | unsigned long flags; | |
254 | ||
255 | spin_lock_irqsave(&vp_dev->lock, flags); | |
256 | list_del(&info->node); | |
257 | spin_unlock_irqrestore(&vp_dev->lock, flags); | |
258 | ||
259 | vp_dev->del_vq(info); | |
3343660d AL |
260 | kfree(info); |
261 | } | |
262 | ||
82af8ce8 | 263 | /* the config->del_vqs() implementation */ |
38eb4a29 | 264 | void vp_del_vqs(struct virtio_device *vdev) |
d2a7ddda | 265 | { |
e969fed5 | 266 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
d2a7ddda | 267 | struct virtqueue *vq, *n; |
e969fed5 | 268 | struct virtio_pci_vq_info *info; |
d2a7ddda | 269 | |
e969fed5 | 270 | list_for_each_entry_safe(vq, n, &vdev->vqs, list) { |
3ec7a77b | 271 | info = vp_dev->vqs[vq->index]; |
31198159 MT |
272 | if (vp_dev->per_vq_vectors && |
273 | info->msix_vector != VIRTIO_MSI_NO_VECTOR) | |
f68d2408 RR |
274 | free_irq(vp_dev->msix_entries[info->msix_vector].vector, |
275 | vq); | |
d2a7ddda | 276 | vp_del_vq(vq); |
e969fed5 MT |
277 | } |
278 | vp_dev->per_vq_vectors = false; | |
82af8ce8 MT |
279 | |
280 | vp_free_vectors(vdev); | |
3ec7a77b | 281 | kfree(vp_dev->vqs); |
d2a7ddda MT |
282 | } |
283 | ||
e969fed5 MT |
284 | static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, |
285 | struct virtqueue *vqs[], | |
286 | vq_callback_t *callbacks[], | |
287 | const char *names[], | |
f68d2408 | 288 | bool use_msix, |
e969fed5 | 289 | bool per_vq_vectors) |
d2a7ddda | 290 | { |
e969fed5 | 291 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); |
f68d2408 RR |
292 | u16 msix_vec; |
293 | int i, err, nvectors, allocated_vectors; | |
82af8ce8 | 294 | |
3ec7a77b MT |
295 | vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL); |
296 | if (!vp_dev->vqs) | |
297 | return -ENOMEM; | |
298 | ||
f68d2408 RR |
299 | if (!use_msix) { |
300 | /* Old style: one normal interrupt for change and all vqs. */ | |
301 | err = vp_request_intx(vdev); | |
302 | if (err) | |
3ec7a77b | 303 | goto error_find; |
f68d2408 RR |
304 | } else { |
305 | if (per_vq_vectors) { | |
306 | /* Best option: one for change interrupt, one per vq. */ | |
307 | nvectors = 1; | |
308 | for (i = 0; i < nvqs; ++i) | |
309 | if (callbacks[i]) | |
310 | ++nvectors; | |
311 | } else { | |
312 | /* Second best: one for change, shared for all vqs. */ | |
313 | nvectors = 2; | |
314 | } | |
315 | ||
316 | err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors); | |
317 | if (err) | |
3ec7a77b | 318 | goto error_find; |
f68d2408 | 319 | } |
d2a7ddda | 320 | |
e969fed5 MT |
321 | vp_dev->per_vq_vectors = per_vq_vectors; |
322 | allocated_vectors = vp_dev->msix_used_vectors; | |
d2a7ddda | 323 | for (i = 0; i < nvqs; ++i) { |
6457f126 MT |
324 | if (!names[i]) { |
325 | vqs[i] = NULL; | |
326 | continue; | |
327 | } else if (!callbacks[i] || !vp_dev->msix_enabled) | |
f68d2408 | 328 | msix_vec = VIRTIO_MSI_NO_VECTOR; |
e969fed5 | 329 | else if (vp_dev->per_vq_vectors) |
f68d2408 | 330 | msix_vec = allocated_vectors++; |
e969fed5 | 331 | else |
f68d2408 | 332 | msix_vec = VP_MSIX_VQ_VECTOR; |
b09f00bb | 333 | vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec); |
e969fed5 MT |
334 | if (IS_ERR(vqs[i])) { |
335 | err = PTR_ERR(vqs[i]); | |
82af8ce8 | 336 | goto error_find; |
e969fed5 | 337 | } |
0b22bd0b MT |
338 | |
339 | if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) | |
340 | continue; | |
341 | ||
e969fed5 | 342 | /* allocate per-vq irq if available and necessary */ |
0b22bd0b MT |
343 | snprintf(vp_dev->msix_names[msix_vec], |
344 | sizeof *vp_dev->msix_names, | |
345 | "%s-%s", | |
346 | dev_name(&vp_dev->vdev.dev), names[i]); | |
347 | err = request_irq(vp_dev->msix_entries[msix_vec].vector, | |
348 | vring_interrupt, 0, | |
349 | vp_dev->msix_names[msix_vec], | |
350 | vqs[i]); | |
351 | if (err) { | |
352 | vp_del_vq(vqs[i]); | |
353 | goto error_find; | |
e969fed5 | 354 | } |
d2a7ddda MT |
355 | } |
356 | return 0; | |
357 | ||
82af8ce8 | 358 | error_find: |
d2a7ddda | 359 | vp_del_vqs(vdev); |
e969fed5 MT |
360 | return err; |
361 | } | |
362 | ||
363 | /* the config->find_vqs() implementation */ | |
38eb4a29 MT |
364 | int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, |
365 | struct virtqueue *vqs[], | |
366 | vq_callback_t *callbacks[], | |
367 | const char *names[]) | |
e969fed5 | 368 | { |
f68d2408 | 369 | int err; |
e969fed5 | 370 | |
f68d2408 RR |
371 | /* Try MSI-X with one vector per queue. */ |
372 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); | |
e969fed5 MT |
373 | if (!err) |
374 | return 0; | |
f68d2408 | 375 | /* Fallback: MSI-X with one vector for config, one shared for queues. */ |
e969fed5 | 376 | err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, |
f68d2408 | 377 | true, false); |
e969fed5 MT |
378 | if (!err) |
379 | return 0; | |
380 | /* Finally fall back to regular interrupts. */ | |
f68d2408 RR |
381 | return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, |
382 | false, false); | |
d2a7ddda MT |
383 | } |
384 | ||
38eb4a29 | 385 | const char *vp_bus_name(struct virtio_device *vdev) |
66846048 RJ |
386 | { |
387 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
388 | ||
389 | return pci_name(vp_dev->pci_dev); | |
390 | } | |
391 | ||
75a0a52b JW |
392 | /* Setup the affinity for a virtqueue: |
393 | * - force the affinity for per vq vector | |
394 | * - OR over all affinities for shared MSI | |
395 | * - ignore the affinity request if we're using INTX | |
396 | */ | |
38eb4a29 | 397 | int vp_set_vq_affinity(struct virtqueue *vq, int cpu) |
75a0a52b JW |
398 | { |
399 | struct virtio_device *vdev = vq->vdev; | |
400 | struct virtio_pci_device *vp_dev = to_vp_device(vdev); | |
3ec7a77b | 401 | struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; |
75a0a52b JW |
402 | struct cpumask *mask; |
403 | unsigned int irq; | |
404 | ||
405 | if (!vq->callback) | |
406 | return -EINVAL; | |
407 | ||
408 | if (vp_dev->msix_enabled) { | |
409 | mask = vp_dev->msix_affinity_masks[info->msix_vector]; | |
410 | irq = vp_dev->msix_entries[info->msix_vector].vector; | |
411 | if (cpu == -1) | |
412 | irq_set_affinity_hint(irq, NULL); | |
413 | else { | |
414 | cpumask_set_cpu(cpu, mask); | |
415 | irq_set_affinity_hint(irq, mask); | |
416 | } | |
417 | } | |
418 | return 0; | |
419 | } | |
420 | ||
38eb4a29 | 421 | void virtio_pci_release_dev(struct device *_d) |
29f9f12e | 422 | { |
72103bd1 MT |
423 | /* |
424 | * No need for a release method as we allocate/free | |
425 | * all devices together with the pci devices. | |
426 | * Provide an empty one to avoid getting a warning from core. | |
427 | */ | |
29f9f12e MM |
428 | } |
429 | ||
9e266ece | 430 | #ifdef CONFIG_PM_SLEEP |
f0fe6f11 AS |
431 | static int virtio_pci_freeze(struct device *dev) |
432 | { | |
433 | struct pci_dev *pci_dev = to_pci_dev(dev); | |
434 | struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); | |
f0fe6f11 AS |
435 | int ret; |
436 | ||
c6716bae | 437 | ret = virtio_device_freeze(&vp_dev->vdev); |
f0fe6f11 AS |
438 | |
439 | if (!ret) | |
440 | pci_disable_device(pci_dev); | |
441 | return ret; | |
442 | } | |
443 | ||
0517fdd1 | 444 | static int virtio_pci_restore(struct device *dev) |
f0fe6f11 AS |
445 | { |
446 | struct pci_dev *pci_dev = to_pci_dev(dev); | |
447 | struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); | |
448 | int ret; | |
449 | ||
450 | ret = pci_enable_device(pci_dev); | |
451 | if (ret) | |
452 | return ret; | |
0517fdd1 | 453 | |
f0fe6f11 | 454 | pci_set_master(pci_dev); |
c6716bae | 455 | return virtio_device_restore(&vp_dev->vdev); |
f0fe6f11 AS |
456 | } |
457 | ||
38eb4a29 | 458 | const struct dev_pm_ops virtio_pci_pm_ops = { |
f878d0be | 459 | SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore) |
d0775363 | 460 | }; |
3343660d | 461 | #endif |