]>
Commit | Line | Data |
---|---|---|
02eb84d0 MT |
1 | /* |
2 | * MSI-X device support | |
3 | * | |
4 | * This module includes support for MSI-X in pci devices. | |
5 | * | |
6 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
7 | * | |
8 | * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
11 | * the COPYING file in the top-level directory. | |
6b620ca3 PB |
12 | * |
13 | * Contributions after 2012-01-13 are licensed under the terms of the | |
14 | * GNU GPL, version 2 or (at your option) any later version. | |
02eb84d0 MT |
15 | */ |
16 | ||
17 | #include "hw.h" | |
60ba3cc2 | 18 | #include "msi.h" |
02eb84d0 MT |
19 | #include "msix.h" |
20 | #include "pci.h" | |
bf1b0071 | 21 | #include "range.h" |
02eb84d0 | 22 | |
02eb84d0 MT |
23 | #define MSIX_CAP_LENGTH 12 |
24 | ||
2760952b MT |
25 | /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ |
26 | #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) | |
02eb84d0 | 27 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) |
5b5cb086 | 28 | #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) |
02eb84d0 | 29 | |
5a1fc5e8 MT |
30 | /* How much space does an MSIX table need. */ |
31 | /* The spec requires giving the table structure | |
32 | * a 4K aligned region all by itself. */ | |
33 | #define MSIX_PAGE_SIZE 0x1000 | |
34 | /* Reserve second half of the page for pending bits */ | |
35 | #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) | |
02eb84d0 MT |
36 | #define MSIX_MAX_ENTRIES 32 |
37 | ||
bc4caf49 JK |
38 | static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) |
39 | { | |
40 | uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; | |
41 | MSIMessage msg; | |
42 | ||
43 | msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); | |
44 | msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); | |
45 | return msg; | |
46 | } | |
02eb84d0 | 47 | |
02eb84d0 MT |
48 | /* Add MSI-X capability to the config space for the device. */ |
49 | /* Given a bar and its size, add MSI-X table on top of it | |
50 | * and fill MSI-X capability in the config space. | |
51 | * Original bar size must be a power of 2 or 0. | |
52 | * New bar size is returned. */ | |
53 | static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, | |
54 | unsigned bar_nr, unsigned bar_size) | |
55 | { | |
56 | int config_offset; | |
57 | uint8_t *config; | |
58 | uint32_t new_size; | |
59 | ||
60 | if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) | |
61 | return -EINVAL; | |
62 | if (bar_size > 0x80000000) | |
63 | return -ENOSPC; | |
64 | ||
65 | /* Add space for MSI-X structures */ | |
5e520a7d | 66 | if (!bar_size) { |
5a1fc5e8 MT |
67 | new_size = MSIX_PAGE_SIZE; |
68 | } else if (bar_size < MSIX_PAGE_SIZE) { | |
69 | bar_size = MSIX_PAGE_SIZE; | |
70 | new_size = MSIX_PAGE_SIZE * 2; | |
71 | } else { | |
02eb84d0 | 72 | new_size = bar_size * 2; |
5a1fc5e8 | 73 | } |
02eb84d0 MT |
74 | |
75 | pdev->msix_bar_size = new_size; | |
ca77089d IY |
76 | config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, |
77 | 0, MSIX_CAP_LENGTH); | |
02eb84d0 MT |
78 | if (config_offset < 0) |
79 | return config_offset; | |
80 | config = pdev->config + config_offset; | |
81 | ||
82 | pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); | |
83 | /* Table on top of BAR */ | |
01731cfb | 84 | pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr); |
02eb84d0 | 85 | /* Pending bits on top of that */ |
01731cfb | 86 | pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) | |
5a1fc5e8 | 87 | bar_nr); |
02eb84d0 | 88 | pdev->msix_cap = config_offset; |
ebabb67a | 89 | /* Make flags bit writable. */ |
5b5cb086 MT |
90 | pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | |
91 | MSIX_MASKALL_MASK; | |
50322249 | 92 | pdev->msix_function_masked = true; |
02eb84d0 MT |
93 | return 0; |
94 | } | |
95 | ||
95524ae8 AK |
96 | static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr, |
97 | unsigned size) | |
02eb84d0 MT |
98 | { |
99 | PCIDevice *dev = opaque; | |
76f5159d | 100 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
02eb84d0 | 101 | void *page = dev->msix_table_page; |
02eb84d0 | 102 | |
76f5159d | 103 | return pci_get_long(page + offset); |
02eb84d0 MT |
104 | } |
105 | ||
02eb84d0 MT |
106 | static uint8_t msix_pending_mask(int vector) |
107 | { | |
108 | return 1 << (vector % 8); | |
109 | } | |
110 | ||
111 | static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) | |
112 | { | |
5a1fc5e8 | 113 | return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8; |
02eb84d0 MT |
114 | } |
115 | ||
116 | static int msix_is_pending(PCIDevice *dev, int vector) | |
117 | { | |
118 | return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); | |
119 | } | |
120 | ||
121 | static void msix_set_pending(PCIDevice *dev, int vector) | |
122 | { | |
123 | *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); | |
124 | } | |
125 | ||
126 | static void msix_clr_pending(PCIDevice *dev, int vector) | |
127 | { | |
128 | *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); | |
129 | } | |
130 | ||
ae392c41 | 131 | static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask) |
02eb84d0 | 132 | { |
ae392c41 MT |
133 | unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; |
134 | return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; | |
5b5cb086 MT |
135 | } |
136 | ||
ae392c41 | 137 | static bool msix_is_masked(PCIDevice *dev, int vector) |
5b5cb086 | 138 | { |
ae392c41 MT |
139 | return msix_vector_masked(dev, vector, dev->msix_function_masked); |
140 | } | |
141 | ||
142 | static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) | |
143 | { | |
144 | bool is_masked = msix_is_masked(dev, vector); | |
145 | if (is_masked == was_masked) { | |
146 | return; | |
147 | } | |
148 | ||
149 | if (!is_masked && msix_is_pending(dev, vector)) { | |
5b5cb086 MT |
150 | msix_clr_pending(dev, vector); |
151 | msix_notify(dev, vector); | |
152 | } | |
153 | } | |
154 | ||
50322249 MT |
155 | static void msix_update_function_masked(PCIDevice *dev) |
156 | { | |
157 | dev->msix_function_masked = !msix_enabled(dev) || | |
158 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); | |
159 | } | |
160 | ||
5b5cb086 MT |
161 | /* Handle MSI-X capability config write. */ |
162 | void msix_write_config(PCIDevice *dev, uint32_t addr, | |
163 | uint32_t val, int len) | |
164 | { | |
165 | unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; | |
166 | int vector; | |
50322249 | 167 | bool was_masked; |
5b5cb086 | 168 | |
98a3cb02 | 169 | if (!range_covers_byte(addr, len, enable_pos)) { |
5b5cb086 MT |
170 | return; |
171 | } | |
172 | ||
50322249 MT |
173 | was_masked = dev->msix_function_masked; |
174 | msix_update_function_masked(dev); | |
175 | ||
5b5cb086 MT |
176 | if (!msix_enabled(dev)) { |
177 | return; | |
178 | } | |
179 | ||
e407bf13 | 180 | pci_device_deassert_intx(dev); |
5b5cb086 | 181 | |
50322249 | 182 | if (dev->msix_function_masked == was_masked) { |
5b5cb086 MT |
183 | return; |
184 | } | |
185 | ||
186 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
ae392c41 MT |
187 | msix_handle_mask_update(dev, vector, |
188 | msix_vector_masked(dev, vector, was_masked)); | |
5b5cb086 | 189 | } |
02eb84d0 MT |
190 | } |
191 | ||
95524ae8 AK |
192 | static void msix_mmio_write(void *opaque, target_phys_addr_t addr, |
193 | uint64_t val, unsigned size) | |
02eb84d0 MT |
194 | { |
195 | PCIDevice *dev = opaque; | |
76f5159d | 196 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
01731cfb | 197 | int vector = offset / PCI_MSIX_ENTRY_SIZE; |
ae392c41 | 198 | bool was_masked; |
9a93b617 MT |
199 | |
200 | /* MSI-X page includes a read-only PBA and a writeable Vector Control. */ | |
201 | if (vector >= dev->msix_entries_nr) { | |
202 | return; | |
203 | } | |
204 | ||
ae392c41 | 205 | was_masked = msix_is_masked(dev, vector); |
76f5159d | 206 | pci_set_long(dev->msix_table_page + offset, val); |
ae392c41 | 207 | msix_handle_mask_update(dev, vector, was_masked); |
02eb84d0 MT |
208 | } |
209 | ||
95524ae8 AK |
210 | static const MemoryRegionOps msix_mmio_ops = { |
211 | .read = msix_mmio_read, | |
212 | .write = msix_mmio_write, | |
213 | .endianness = DEVICE_NATIVE_ENDIAN, | |
214 | .valid = { | |
215 | .min_access_size = 4, | |
216 | .max_access_size = 4, | |
217 | }, | |
02eb84d0 MT |
218 | }; |
219 | ||
95524ae8 | 220 | static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) |
02eb84d0 MT |
221 | { |
222 | uint8_t *config = d->config + d->msix_cap; | |
01731cfb | 223 | uint32_t table = pci_get_long(config + PCI_MSIX_TABLE); |
5a1fc5e8 | 224 | uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1); |
02eb84d0 MT |
225 | /* TODO: for assigned devices, we'll want to make it possible to map |
226 | * pending bits separately in case they are in a separate bar. */ | |
02eb84d0 | 227 | |
95524ae8 | 228 | memory_region_add_subregion(bar, offset, &d->msix_mmio); |
02eb84d0 MT |
229 | } |
230 | ||
ae1be0bb MT |
231 | static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) |
232 | { | |
233 | int vector; | |
5b5f1330 | 234 | |
ae1be0bb | 235 | for (vector = 0; vector < nentries; ++vector) { |
01731cfb JK |
236 | unsigned offset = |
237 | vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; | |
5b5f1330 JK |
238 | bool was_masked = msix_is_masked(dev, vector); |
239 | ||
01731cfb | 240 | dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; |
5b5f1330 | 241 | msix_handle_mask_update(dev, vector, was_masked); |
ae1be0bb MT |
242 | } |
243 | } | |
244 | ||
02eb84d0 MT |
245 | /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is |
246 | * modified, it should be retrieved with msix_bar_size. */ | |
247 | int msix_init(struct PCIDevice *dev, unsigned short nentries, | |
95524ae8 | 248 | MemoryRegion *bar, |
5a1fc5e8 | 249 | unsigned bar_nr, unsigned bar_size) |
02eb84d0 MT |
250 | { |
251 | int ret; | |
60ba3cc2 | 252 | |
02eb84d0 | 253 | /* Nothing to do if MSI is not supported by interrupt controller */ |
60ba3cc2 | 254 | if (!msi_supported) { |
02eb84d0 | 255 | return -ENOTSUP; |
60ba3cc2 | 256 | } |
02eb84d0 MT |
257 | if (nentries > MSIX_MAX_ENTRIES) |
258 | return -EINVAL; | |
259 | ||
7267c094 | 260 | dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES * |
02eb84d0 MT |
261 | sizeof *dev->msix_entry_used); |
262 | ||
7267c094 | 263 | dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE); |
ae1be0bb | 264 | msix_mask_all(dev, nentries); |
02eb84d0 | 265 | |
95524ae8 AK |
266 | memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev, |
267 | "msix", MSIX_PAGE_SIZE); | |
02eb84d0 MT |
268 | |
269 | dev->msix_entries_nr = nentries; | |
270 | ret = msix_add_config(dev, nentries, bar_nr, bar_size); | |
271 | if (ret) | |
272 | goto err_config; | |
273 | ||
274 | dev->cap_present |= QEMU_PCI_CAP_MSIX; | |
95524ae8 | 275 | msix_mmio_setup(dev, bar); |
02eb84d0 MT |
276 | return 0; |
277 | ||
278 | err_config: | |
3174ecd1 | 279 | dev->msix_entries_nr = 0; |
95524ae8 | 280 | memory_region_destroy(&dev->msix_mmio); |
7267c094 | 281 | g_free(dev->msix_table_page); |
02eb84d0 | 282 | dev->msix_table_page = NULL; |
7267c094 | 283 | g_free(dev->msix_entry_used); |
02eb84d0 MT |
284 | dev->msix_entry_used = NULL; |
285 | return ret; | |
286 | } | |
287 | ||
98304c84 MT |
288 | static void msix_free_irq_entries(PCIDevice *dev) |
289 | { | |
290 | int vector; | |
291 | ||
292 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
293 | dev->msix_entry_used[vector] = 0; | |
294 | msix_clr_pending(dev, vector); | |
295 | } | |
296 | } | |
297 | ||
02eb84d0 | 298 | /* Clean up resources for the device. */ |
95524ae8 | 299 | int msix_uninit(PCIDevice *dev, MemoryRegion *bar) |
02eb84d0 MT |
300 | { |
301 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
302 | return 0; | |
303 | pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
304 | dev->msix_cap = 0; | |
305 | msix_free_irq_entries(dev); | |
306 | dev->msix_entries_nr = 0; | |
95524ae8 AK |
307 | memory_region_del_subregion(bar, &dev->msix_mmio); |
308 | memory_region_destroy(&dev->msix_mmio); | |
7267c094 | 309 | g_free(dev->msix_table_page); |
02eb84d0 | 310 | dev->msix_table_page = NULL; |
7267c094 | 311 | g_free(dev->msix_entry_used); |
02eb84d0 MT |
312 | dev->msix_entry_used = NULL; |
313 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | |
314 | return 0; | |
315 | } | |
316 | ||
317 | void msix_save(PCIDevice *dev, QEMUFile *f) | |
318 | { | |
9a3e12c8 MT |
319 | unsigned n = dev->msix_entries_nr; |
320 | ||
72755a70 | 321 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
9a3e12c8 | 322 | return; |
72755a70 | 323 | } |
9a3e12c8 | 324 | |
01731cfb | 325 | qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); |
5a1fc5e8 | 326 | qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
02eb84d0 MT |
327 | } |
328 | ||
329 | /* Should be called after restoring the config space. */ | |
330 | void msix_load(PCIDevice *dev, QEMUFile *f) | |
331 | { | |
332 | unsigned n = dev->msix_entries_nr; | |
333 | ||
98846d73 | 334 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
02eb84d0 | 335 | return; |
98846d73 | 336 | } |
02eb84d0 | 337 | |
4bfd1712 | 338 | msix_free_irq_entries(dev); |
01731cfb | 339 | qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); |
5a1fc5e8 | 340 | qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
50322249 | 341 | msix_update_function_masked(dev); |
02eb84d0 MT |
342 | } |
343 | ||
344 | /* Does device support MSI-X? */ | |
345 | int msix_present(PCIDevice *dev) | |
346 | { | |
347 | return dev->cap_present & QEMU_PCI_CAP_MSIX; | |
348 | } | |
349 | ||
350 | /* Is MSI-X enabled? */ | |
351 | int msix_enabled(PCIDevice *dev) | |
352 | { | |
353 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) && | |
2760952b | 354 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & |
02eb84d0 MT |
355 | MSIX_ENABLE_MASK); |
356 | } | |
357 | ||
358 | /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ | |
359 | uint32_t msix_bar_size(PCIDevice *dev) | |
360 | { | |
361 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) ? | |
362 | dev->msix_bar_size : 0; | |
363 | } | |
364 | ||
365 | /* Send an MSI-X message */ | |
366 | void msix_notify(PCIDevice *dev, unsigned vector) | |
367 | { | |
bc4caf49 | 368 | MSIMessage msg; |
02eb84d0 MT |
369 | |
370 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) | |
371 | return; | |
372 | if (msix_is_masked(dev, vector)) { | |
373 | msix_set_pending(dev, vector); | |
374 | return; | |
375 | } | |
376 | ||
bc4caf49 JK |
377 | msg = msix_get_message(dev, vector); |
378 | ||
379 | stl_le_phys(msg.address, msg.data); | |
02eb84d0 MT |
380 | } |
381 | ||
382 | void msix_reset(PCIDevice *dev) | |
383 | { | |
384 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
385 | return; | |
386 | msix_free_irq_entries(dev); | |
2760952b MT |
387 | dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= |
388 | ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; | |
5a1fc5e8 | 389 | memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE); |
ae1be0bb | 390 | msix_mask_all(dev, dev->msix_entries_nr); |
02eb84d0 MT |
391 | } |
392 | ||
393 | /* PCI spec suggests that devices make it possible for software to configure | |
394 | * less vectors than supported by the device, but does not specify a standard | |
395 | * mechanism for devices to do so. | |
396 | * | |
397 | * We support this by asking devices to declare vectors software is going to | |
398 | * actually use, and checking this on the notification path. Devices that | |
399 | * don't want to follow the spec suggestion can declare all vectors as used. */ | |
400 | ||
401 | /* Mark vector as used. */ | |
402 | int msix_vector_use(PCIDevice *dev, unsigned vector) | |
403 | { | |
404 | if (vector >= dev->msix_entries_nr) | |
405 | return -EINVAL; | |
406 | dev->msix_entry_used[vector]++; | |
407 | return 0; | |
408 | } | |
409 | ||
410 | /* Mark vector as unused. */ | |
411 | void msix_vector_unuse(PCIDevice *dev, unsigned vector) | |
412 | { | |
98304c84 MT |
413 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { |
414 | return; | |
415 | } | |
416 | if (--dev->msix_entry_used[vector]) { | |
417 | return; | |
418 | } | |
419 | msix_clr_pending(dev, vector); | |
02eb84d0 | 420 | } |
b5f28bca MT |
421 | |
422 | void msix_unuse_all_vectors(PCIDevice *dev) | |
423 | { | |
424 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
425 | return; | |
426 | msix_free_irq_entries(dev); | |
427 | } |