]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
30edc14b KRW |
2 | /* |
3 | * PCI Backend Operations - respond to PCI requests from Frontend | |
4 | * | |
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
6 | */ | |
283c0972 JP |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
69049454 | 9 | #define dev_fmt pr_fmt |
283c0972 | 10 | |
59aa56bf | 11 | #include <linux/moduleparam.h> |
30edc14b KRW |
12 | #include <linux/wait.h> |
13 | #include <linux/bitops.h> | |
14 | #include <xen/events.h> | |
15 | #include <linux/sched.h> | |
16 | #include "pciback.h" | |
17 | ||
a92336a1 KRW |
18 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
19 | ||
0513fe9e | 20 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 21 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
22 | * which does the actual PCI device enable/disable. |
23 | */ | |
a92336a1 | 24 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 25 | { |
a92336a1 | 26 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
27 | int rc; |
28 | int enable = 0; | |
29 | ||
30 | dev_data = pci_get_drvdata(dev); | |
31 | if (!dev_data) | |
32 | return; | |
33 | ||
34 | /* We don't deal with bridges */ | |
35 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
36 | return; | |
37 | ||
38 | if (reset) { | |
39 | dev_data->enable_intx = 0; | |
40 | dev_data->ack_intr = 0; | |
41 | } | |
42 | enable = dev_data->enable_intx; | |
43 | ||
44 | /* Asked to disable, but ISR isn't runnig */ | |
45 | if (!enable && !dev_data->isr_on) | |
46 | return; | |
47 | ||
48 | /* Squirrel away the IRQs in the dev_data. We need this | |
49 | * b/c when device transitions to MSI, the dev->irq is | |
50 | * overwritten with the MSI vector. | |
51 | */ | |
52 | if (enable) | |
53 | dev_data->irq = dev->irq; | |
54 | ||
e17ab35f KRW |
55 | /* |
56 | * SR-IOV devices in all use MSI-X and have no legacy | |
57 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
58 | */ | |
59 | if (dev_data->irq == 0) | |
60 | goto out; | |
61 | ||
0513fe9e KRW |
62 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
63 | dev_data->irq_name, | |
64 | dev_data->irq, | |
65 | pci_is_enabled(dev) ? "on" : "off", | |
66 | dev->msi_enabled ? "MSI" : "", | |
67 | dev->msix_enabled ? "MSI/X" : "", | |
68 | dev_data->isr_on ? "enable" : "disable", | |
69 | enable ? "enable" : "disable"); | |
70 | ||
71 | if (enable) { | |
a396f3a2 KRW |
72 | /* |
73 | * The MSI or MSI-X should not have an IRQ handler. Otherwise | |
74 | * if the guest terminates we BUG_ON in free_msi_irqs. | |
75 | */ | |
76 | if (dev->msi_enabled || dev->msix_enabled) | |
77 | goto out; | |
78 | ||
0513fe9e | 79 | rc = request_irq(dev_data->irq, |
a92336a1 | 80 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
81 | dev_data->irq_name, dev); |
82 | if (rc) { | |
83 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
84 | "handler for IRQ %d! (rc:%d)\n", | |
85 | dev_data->irq_name, dev_data->irq, rc); | |
86 | goto out; | |
87 | } | |
88 | } else { | |
89 | free_irq(dev_data->irq, dev); | |
90 | dev_data->irq = 0; | |
91 | } | |
92 | dev_data->isr_on = enable; | |
93 | dev_data->ack_intr = enable; | |
94 | out: | |
95 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
96 | dev_data->irq_name, | |
97 | dev_data->irq, | |
98 | pci_is_enabled(dev) ? "on" : "off", | |
99 | dev->msi_enabled ? "MSI" : "", | |
100 | dev->msix_enabled ? "MSI/X" : "", | |
101 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
102 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
103 | } | |
104 | ||
30edc14b | 105 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 106 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
107 | * ready to be re-exported) |
108 | */ | |
a92336a1 | 109 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
110 | { |
111 | u16 cmd; | |
112 | ||
a92336a1 | 113 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 114 | |
30edc14b KRW |
115 | /* Disable devices (but not bridges) */ |
116 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
117 | #ifdef CONFIG_PCI_MSI |
118 | /* The guest could have been abruptly killed without | |
119 | * disabling MSI/MSI-X interrupts.*/ | |
120 | if (dev->msix_enabled) | |
121 | pci_disable_msix(dev); | |
122 | if (dev->msi_enabled) | |
123 | pci_disable_msi(dev); | |
124 | #endif | |
bdc5c181 KRW |
125 | if (pci_is_enabled(dev)) |
126 | pci_disable_device(dev); | |
30edc14b | 127 | |
30edc14b KRW |
128 | dev->is_busmaster = 0; |
129 | } else { | |
130 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
131 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
132 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
133 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
134 | ||
135 | dev->is_busmaster = 0; | |
136 | } | |
137 | } | |
138 | } | |
a92336a1 KRW |
139 | |
140 | #ifdef CONFIG_PCI_MSI | |
141 | static | |
142 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
143 | struct pci_dev *dev, struct xen_pci_op *op) | |
144 | { | |
145 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
146 | int status; |
147 | ||
56441f3c KRW |
148 | if (dev->msi_enabled) |
149 | status = -EALREADY; | |
150 | else if (dev->msix_enabled) | |
151 | status = -ENXIO; | |
152 | else | |
153 | status = pci_enable_msi(dev); | |
a92336a1 KRW |
154 | |
155 | if (status) { | |
69049454 BH |
156 | dev_warn_ratelimited(&dev->dev, "error enabling MSI for guest %u: err %d\n", |
157 | pdev->xdev->otherend_id, status); | |
a92336a1 KRW |
158 | op->value = 0; |
159 | return XEN_PCI_ERR_op_failed; | |
160 | } | |
161 | ||
162 | /* The value the guest needs is actually the IDT vector, not the | |
163 | * the local domain's IRQ number. */ | |
164 | ||
165 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
4969a3a2 BO |
166 | |
167 | dev_dbg(&dev->dev, "MSI: %d\n", op->value); | |
a92336a1 KRW |
168 | |
169 | dev_data = pci_get_drvdata(dev); | |
170 | if (dev_data) | |
171 | dev_data->ack_intr = 0; | |
172 | ||
173 | return 0; | |
174 | } | |
175 | ||
176 | static | |
177 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
178 | struct pci_dev *dev, struct xen_pci_op *op) | |
179 | { | |
7cfb905b KRW |
180 | if (dev->msi_enabled) { |
181 | struct xen_pcibk_dev_data *dev_data; | |
182 | ||
183 | pci_disable_msi(dev); | |
184 | ||
185 | dev_data = pci_get_drvdata(dev); | |
186 | if (dev_data) | |
187 | dev_data->ack_intr = 1; | |
188 | } | |
a92336a1 | 189 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; |
4969a3a2 BO |
190 | |
191 | dev_dbg(&dev->dev, "MSI: %d\n", op->value); | |
192 | ||
a92336a1 KRW |
193 | return 0; |
194 | } | |
195 | ||
196 | static | |
197 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
198 | struct pci_dev *dev, struct xen_pci_op *op) | |
199 | { | |
200 | struct xen_pcibk_dev_data *dev_data; | |
201 | int i, result; | |
202 | struct msix_entry *entries; | |
408fb0e5 | 203 | u16 cmd; |
a92336a1 | 204 | |
4969a3a2 | 205 | dev_dbg(&dev->dev, "enable MSI-X\n"); |
5e0ce145 | 206 | |
a92336a1 KRW |
207 | if (op->value > SH_INFO_MAX_VEC) |
208 | return -EINVAL; | |
209 | ||
5e0ce145 KRW |
210 | if (dev->msix_enabled) |
211 | return -EALREADY; | |
212 | ||
408fb0e5 KRW |
213 | /* |
214 | * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able | |
215 | * to access the BARs where the MSI-X entries reside. | |
8d47065f | 216 | * But VF devices are unique in which the PF needs to be checked. |
408fb0e5 | 217 | */ |
8d47065f | 218 | pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); |
408fb0e5 | 219 | if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) |
5e0ce145 KRW |
220 | return -ENXIO; |
221 | ||
6da2ec56 | 222 | entries = kmalloc_array(op->value, sizeof(*entries), GFP_KERNEL); |
a92336a1 KRW |
223 | if (entries == NULL) |
224 | return -ENOMEM; | |
225 | ||
226 | for (i = 0; i < op->value; i++) { | |
227 | entries[i].entry = op->msix_entries[i].entry; | |
228 | entries[i].vector = op->msix_entries[i].vector; | |
229 | } | |
230 | ||
efdfa3ed | 231 | result = pci_enable_msix_exact(dev, entries, op->value); |
a92336a1 KRW |
232 | if (result == 0) { |
233 | for (i = 0; i < op->value; i++) { | |
234 | op->msix_entries[i].entry = entries[i].entry; | |
c0914e61 | 235 | if (entries[i].vector) { |
a92336a1 KRW |
236 | op->msix_entries[i].vector = |
237 | xen_pirq_from_irq(entries[i].vector); | |
4969a3a2 BO |
238 | dev_dbg(&dev->dev, "MSI-X[%d]: %d\n", i, |
239 | op->msix_entries[i].vector); | |
c0914e61 | 240 | } |
a92336a1 | 241 | } |
51ac8893 | 242 | } else |
69049454 BH |
243 | dev_warn_ratelimited(&dev->dev, "error enabling MSI-X for guest %u: err %d!\n", |
244 | pdev->xdev->otherend_id, result); | |
a92336a1 KRW |
245 | kfree(entries); |
246 | ||
247 | op->value = result; | |
248 | dev_data = pci_get_drvdata(dev); | |
249 | if (dev_data) | |
250 | dev_data->ack_intr = 0; | |
251 | ||
0ee46eca | 252 | return result > 0 ? 0 : result; |
a92336a1 KRW |
253 | } |
254 | ||
255 | static | |
256 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
257 | struct pci_dev *dev, struct xen_pci_op *op) | |
258 | { | |
7cfb905b KRW |
259 | if (dev->msix_enabled) { |
260 | struct xen_pcibk_dev_data *dev_data; | |
261 | ||
262 | pci_disable_msix(dev); | |
263 | ||
264 | dev_data = pci_get_drvdata(dev); | |
265 | if (dev_data) | |
266 | dev_data->ack_intr = 1; | |
267 | } | |
a92336a1 KRW |
268 | /* |
269 | * SR-IOV devices (which don't have any legacy IRQ) have | |
270 | * an undefined IRQ value of zero. | |
271 | */ | |
272 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
4969a3a2 BO |
273 | |
274 | dev_dbg(&dev->dev, "MSI-X: %d\n", op->value); | |
275 | ||
a92336a1 KRW |
276 | return 0; |
277 | } | |
278 | #endif | |
c2711441 JG |
279 | |
280 | static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) | |
281 | { | |
282 | return test_bit(_XEN_PCIF_active, | |
283 | (unsigned long *)&pdev->sh_info->flags) && | |
284 | !test_and_set_bit(_PDEVF_op_active, &pdev->flags); | |
285 | } | |
286 | ||
30edc14b KRW |
287 | /* |
288 | * Now the same evtchn is used for both pcifront conf_read_write request | |
289 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 290 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
291 | * do_recovery job which also use the system default work_queue |
292 | */ | |
c2711441 | 293 | static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b | 294 | { |
c2711441 JG |
295 | bool eoi = true; |
296 | ||
30edc14b KRW |
297 | /* Check that frontend is requesting an operation and that we are not |
298 | * already processing a request */ | |
c2711441 | 299 | if (xen_pcibk_test_op_pending(pdev)) { |
429eafe6 | 300 | schedule_work(&pdev->op_work); |
c2711441 | 301 | eoi = false; |
30edc14b KRW |
302 | } |
303 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 304 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
305 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
306 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 307 | wake_up(&xen_pcibk_aer_wait_queue); |
c2711441 | 308 | eoi = false; |
30edc14b | 309 | } |
c2711441 JG |
310 | |
311 | /* EOI if there was nothing to do. */ | |
312 | if (eoi) | |
313 | xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); | |
30edc14b KRW |
314 | } |
315 | ||
316 | /* Performing the configuration space reads/writes must not be done in atomic | |
317 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
318 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 319 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 320 | |
c2711441 | 321 | static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) |
30edc14b | 322 | { |
30edc14b | 323 | struct pci_dev *dev; |
a92336a1 | 324 | struct xen_pcibk_dev_data *dev_data = NULL; |
8135cf8b | 325 | struct xen_pci_op *op = &pdev->op; |
0513fe9e | 326 | int test_intx = 0; |
d159457b KRW |
327 | #ifdef CONFIG_PCI_MSI |
328 | unsigned int nr = 0; | |
329 | #endif | |
30edc14b | 330 | |
8135cf8b KRW |
331 | *op = pdev->sh_info->op; |
332 | barrier(); | |
a92336a1 | 333 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
334 | |
335 | if (dev == NULL) | |
336 | op->err = XEN_PCI_ERR_dev_not_found; | |
337 | else { | |
0513fe9e KRW |
338 | dev_data = pci_get_drvdata(dev); |
339 | if (dev_data) | |
340 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
341 | switch (op->cmd) { |
342 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 343 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
344 | op->offset, op->size, &op->value); |
345 | break; | |
346 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 347 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
348 | op->offset, op->size, op->value); |
349 | break; | |
350 | #ifdef CONFIG_PCI_MSI | |
351 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 352 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
353 | break; |
354 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 355 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
356 | break; |
357 | case XEN_PCI_OP_enable_msix: | |
d159457b | 358 | nr = op->value; |
a92336a1 | 359 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
360 | break; |
361 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 362 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
363 | break; |
364 | #endif | |
365 | default: | |
366 | op->err = XEN_PCI_ERR_not_implemented; | |
367 | break; | |
368 | } | |
369 | } | |
0513fe9e KRW |
370 | if (!op->err && dev && dev_data) { |
371 | /* Transition detected */ | |
372 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 373 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 374 | } |
8135cf8b KRW |
375 | pdev->sh_info->op.err = op->err; |
376 | pdev->sh_info->op.value = op->value; | |
377 | #ifdef CONFIG_PCI_MSI | |
378 | if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { | |
379 | unsigned int i; | |
380 | ||
d159457b | 381 | for (i = 0; i < nr; i++) |
8135cf8b KRW |
382 | pdev->sh_info->op.msix_entries[i].vector = |
383 | op->msix_entries[i].vector; | |
384 | } | |
385 | #endif | |
30edc14b KRW |
386 | /* Tell the driver domain that we're done. */ |
387 | wmb(); | |
388 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
389 | notify_remote_via_irq(pdev->evtchn_irq); | |
390 | ||
391 | /* Mark that we're done. */ | |
4e857c58 | 392 | smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ |
30edc14b | 393 | clear_bit(_PDEVF_op_active, &pdev->flags); |
4e857c58 | 394 | smp_mb__after_atomic(); /* /before/ final check for work */ |
c2711441 | 395 | } |
30edc14b | 396 | |
c2711441 JG |
397 | void xen_pcibk_do_op(struct work_struct *data) |
398 | { | |
399 | struct xen_pcibk_device *pdev = | |
400 | container_of(data, struct xen_pcibk_device, op_work); | |
401 | ||
402 | do { | |
403 | xen_pcibk_do_one_op(pdev); | |
404 | } while (xen_pcibk_test_op_pending(pdev)); | |
405 | ||
406 | xen_pcibk_lateeoi(pdev, 0); | |
30edc14b KRW |
407 | } |
408 | ||
a92336a1 | 409 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 410 | { |
a92336a1 | 411 | struct xen_pcibk_device *pdev = dev_id; |
c2711441 JG |
412 | bool eoi; |
413 | ||
414 | /* IRQs might come in before pdev->evtchn_irq is written. */ | |
415 | if (unlikely(pdev->evtchn_irq != irq)) | |
416 | pdev->evtchn_irq = irq; | |
417 | ||
418 | eoi = test_and_set_bit(_EOI_pending, &pdev->flags); | |
419 | WARN(eoi, "IRQ while EOI pending\n"); | |
30edc14b | 420 | |
a92336a1 | 421 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
422 | |
423 | return IRQ_HANDLED; | |
424 | } | |
a92336a1 | 425 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
426 | { |
427 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 428 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
429 | |
430 | if (dev_data->isr_on && dev_data->ack_intr) { | |
431 | dev_data->handled++; | |
432 | if ((dev_data->handled % 1000) == 0) { | |
433 | if (xen_test_irq_shared(irq)) { | |
69049454 | 434 | dev_info(&dev->dev, "%s IRQ line is not shared " |
0513fe9e KRW |
435 | "with other domains. Turning ISR off\n", |
436 | dev_data->irq_name); | |
437 | dev_data->ack_intr = 0; | |
438 | } | |
439 | } | |
440 | return IRQ_HANDLED; | |
441 | } | |
442 | return IRQ_NONE; | |
443 | } |