]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * MSI-X device support | |
3 | * | |
4 | * This module includes support for MSI-X in pci devices. | |
5 | * | |
6 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
7 | * | |
8 | * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
11 | * the COPYING file in the top-level directory. | |
12 | */ | |
13 | ||
14 | #include "hw.h" | |
15 | #include "msix.h" | |
16 | #include "pci.h" | |
17 | #include "range.h" | |
18 | ||
19 | #define MSIX_CAP_LENGTH 12 | |
20 | ||
21 | /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ | |
22 | #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) | |
23 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) | |
24 | #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) | |
25 | ||
26 | /* How much space does an MSIX table need. */ | |
27 | /* The spec requires giving the table structure | |
28 | * a 4K aligned region all by itself. */ | |
29 | #define MSIX_PAGE_SIZE 0x1000 | |
30 | /* Reserve second half of the page for pending bits */ | |
31 | #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) | |
32 | #define MSIX_MAX_ENTRIES 32 | |
33 | ||
34 | ||
35 | /* Flag for interrupt controller to declare MSI-X support */ | |
36 | int msix_supported; | |
37 | ||
38 | /* Add MSI-X capability to the config space for the device. */ | |
39 | /* Given a bar and its size, add MSI-X table on top of it | |
40 | * and fill MSI-X capability in the config space. | |
41 | * Original bar size must be a power of 2 or 0. | |
42 | * New bar size is returned. */ | |
43 | static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, | |
44 | unsigned bar_nr, unsigned bar_size) | |
45 | { | |
46 | int config_offset; | |
47 | uint8_t *config; | |
48 | uint32_t new_size; | |
49 | ||
50 | if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) | |
51 | return -EINVAL; | |
52 | if (bar_size > 0x80000000) | |
53 | return -ENOSPC; | |
54 | ||
55 | /* Add space for MSI-X structures */ | |
56 | if (!bar_size) { | |
57 | new_size = MSIX_PAGE_SIZE; | |
58 | } else if (bar_size < MSIX_PAGE_SIZE) { | |
59 | bar_size = MSIX_PAGE_SIZE; | |
60 | new_size = MSIX_PAGE_SIZE * 2; | |
61 | } else { | |
62 | new_size = bar_size * 2; | |
63 | } | |
64 | ||
65 | pdev->msix_bar_size = new_size; | |
66 | config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, | |
67 | 0, MSIX_CAP_LENGTH); | |
68 | if (config_offset < 0) | |
69 | return config_offset; | |
70 | config = pdev->config + config_offset; | |
71 | ||
72 | pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); | |
73 | /* Table on top of BAR */ | |
74 | pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr); | |
75 | /* Pending bits on top of that */ | |
76 | pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) | | |
77 | bar_nr); | |
78 | pdev->msix_cap = config_offset; | |
79 | /* Make flags bit writable. */ | |
80 | pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | | |
81 | MSIX_MASKALL_MASK; | |
82 | pdev->msix_function_masked = true; | |
83 | return 0; | |
84 | } | |
85 | ||
86 | static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr, | |
87 | unsigned size) | |
88 | { | |
89 | PCIDevice *dev = opaque; | |
90 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; | |
91 | void *page = dev->msix_table_page; | |
92 | ||
93 | return pci_get_long(page + offset); | |
94 | } | |
95 | ||
96 | static uint8_t msix_pending_mask(int vector) | |
97 | { | |
98 | return 1 << (vector % 8); | |
99 | } | |
100 | ||
101 | static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) | |
102 | { | |
103 | return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8; | |
104 | } | |
105 | ||
106 | static int msix_is_pending(PCIDevice *dev, int vector) | |
107 | { | |
108 | return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); | |
109 | } | |
110 | ||
111 | static void msix_set_pending(PCIDevice *dev, int vector) | |
112 | { | |
113 | *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); | |
114 | } | |
115 | ||
116 | static void msix_clr_pending(PCIDevice *dev, int vector) | |
117 | { | |
118 | *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); | |
119 | } | |
120 | ||
121 | static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask) | |
122 | { | |
123 | unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; | |
124 | return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; | |
125 | } | |
126 | ||
127 | static bool msix_is_masked(PCIDevice *dev, int vector) | |
128 | { | |
129 | return msix_vector_masked(dev, vector, dev->msix_function_masked); | |
130 | } | |
131 | ||
132 | static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) | |
133 | { | |
134 | bool is_masked = msix_is_masked(dev, vector); | |
135 | if (is_masked == was_masked) { | |
136 | return; | |
137 | } | |
138 | ||
139 | if (!is_masked && msix_is_pending(dev, vector)) { | |
140 | msix_clr_pending(dev, vector); | |
141 | msix_notify(dev, vector); | |
142 | } | |
143 | } | |
144 | ||
145 | static void msix_update_function_masked(PCIDevice *dev) | |
146 | { | |
147 | dev->msix_function_masked = !msix_enabled(dev) || | |
148 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); | |
149 | } | |
150 | ||
151 | /* Handle MSI-X capability config write. */ | |
152 | void msix_write_config(PCIDevice *dev, uint32_t addr, | |
153 | uint32_t val, int len) | |
154 | { | |
155 | unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; | |
156 | int vector; | |
157 | bool was_masked; | |
158 | ||
159 | if (!range_covers_byte(addr, len, enable_pos)) { | |
160 | return; | |
161 | } | |
162 | ||
163 | was_masked = dev->msix_function_masked; | |
164 | msix_update_function_masked(dev); | |
165 | ||
166 | if (!msix_enabled(dev)) { | |
167 | return; | |
168 | } | |
169 | ||
170 | pci_device_deassert_intx(dev); | |
171 | ||
172 | if (dev->msix_function_masked == was_masked) { | |
173 | return; | |
174 | } | |
175 | ||
176 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
177 | msix_handle_mask_update(dev, vector, | |
178 | msix_vector_masked(dev, vector, was_masked)); | |
179 | } | |
180 | } | |
181 | ||
182 | static void msix_mmio_write(void *opaque, target_phys_addr_t addr, | |
183 | uint64_t val, unsigned size) | |
184 | { | |
185 | PCIDevice *dev = opaque; | |
186 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; | |
187 | int vector = offset / PCI_MSIX_ENTRY_SIZE; | |
188 | bool was_masked; | |
189 | ||
190 | /* MSI-X page includes a read-only PBA and a writeable Vector Control. */ | |
191 | if (vector >= dev->msix_entries_nr) { | |
192 | return; | |
193 | } | |
194 | ||
195 | was_masked = msix_is_masked(dev, vector); | |
196 | pci_set_long(dev->msix_table_page + offset, val); | |
197 | msix_handle_mask_update(dev, vector, was_masked); | |
198 | } | |
199 | ||
200 | static const MemoryRegionOps msix_mmio_ops = { | |
201 | .read = msix_mmio_read, | |
202 | .write = msix_mmio_write, | |
203 | .endianness = DEVICE_NATIVE_ENDIAN, | |
204 | .valid = { | |
205 | .min_access_size = 4, | |
206 | .max_access_size = 4, | |
207 | }, | |
208 | }; | |
209 | ||
210 | static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar) | |
211 | { | |
212 | uint8_t *config = d->config + d->msix_cap; | |
213 | uint32_t table = pci_get_long(config + PCI_MSIX_TABLE); | |
214 | uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1); | |
215 | /* TODO: for assigned devices, we'll want to make it possible to map | |
216 | * pending bits separately in case they are in a separate bar. */ | |
217 | ||
218 | memory_region_add_subregion(bar, offset, &d->msix_mmio); | |
219 | } | |
220 | ||
221 | static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) | |
222 | { | |
223 | int vector; | |
224 | for (vector = 0; vector < nentries; ++vector) { | |
225 | unsigned offset = | |
226 | vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; | |
227 | dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; | |
228 | } | |
229 | } | |
230 | ||
231 | /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is | |
232 | * modified, it should be retrieved with msix_bar_size. */ | |
233 | int msix_init(struct PCIDevice *dev, unsigned short nentries, | |
234 | MemoryRegion *bar, | |
235 | unsigned bar_nr, unsigned bar_size) | |
236 | { | |
237 | int ret; | |
238 | /* Nothing to do if MSI is not supported by interrupt controller */ | |
239 | if (!msix_supported) | |
240 | return -ENOTSUP; | |
241 | ||
242 | if (nentries > MSIX_MAX_ENTRIES) | |
243 | return -EINVAL; | |
244 | ||
245 | dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES * | |
246 | sizeof *dev->msix_entry_used); | |
247 | ||
248 | dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE); | |
249 | msix_mask_all(dev, nentries); | |
250 | ||
251 | memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev, | |
252 | "msix", MSIX_PAGE_SIZE); | |
253 | ||
254 | dev->msix_entries_nr = nentries; | |
255 | ret = msix_add_config(dev, nentries, bar_nr, bar_size); | |
256 | if (ret) | |
257 | goto err_config; | |
258 | ||
259 | dev->cap_present |= QEMU_PCI_CAP_MSIX; | |
260 | msix_mmio_setup(dev, bar); | |
261 | return 0; | |
262 | ||
263 | err_config: | |
264 | dev->msix_entries_nr = 0; | |
265 | memory_region_destroy(&dev->msix_mmio); | |
266 | g_free(dev->msix_table_page); | |
267 | dev->msix_table_page = NULL; | |
268 | g_free(dev->msix_entry_used); | |
269 | dev->msix_entry_used = NULL; | |
270 | return ret; | |
271 | } | |
272 | ||
273 | static void msix_free_irq_entries(PCIDevice *dev) | |
274 | { | |
275 | int vector; | |
276 | ||
277 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
278 | dev->msix_entry_used[vector] = 0; | |
279 | msix_clr_pending(dev, vector); | |
280 | } | |
281 | } | |
282 | ||
283 | /* Clean up resources for the device. */ | |
284 | int msix_uninit(PCIDevice *dev, MemoryRegion *bar) | |
285 | { | |
286 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
287 | return 0; | |
288 | pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
289 | dev->msix_cap = 0; | |
290 | msix_free_irq_entries(dev); | |
291 | dev->msix_entries_nr = 0; | |
292 | memory_region_del_subregion(bar, &dev->msix_mmio); | |
293 | memory_region_destroy(&dev->msix_mmio); | |
294 | g_free(dev->msix_table_page); | |
295 | dev->msix_table_page = NULL; | |
296 | g_free(dev->msix_entry_used); | |
297 | dev->msix_entry_used = NULL; | |
298 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | |
299 | return 0; | |
300 | } | |
301 | ||
302 | void msix_save(PCIDevice *dev, QEMUFile *f) | |
303 | { | |
304 | unsigned n = dev->msix_entries_nr; | |
305 | ||
306 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { | |
307 | return; | |
308 | } | |
309 | ||
310 | qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); | |
311 | qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); | |
312 | } | |
313 | ||
314 | /* Should be called after restoring the config space. */ | |
315 | void msix_load(PCIDevice *dev, QEMUFile *f) | |
316 | { | |
317 | unsigned n = dev->msix_entries_nr; | |
318 | ||
319 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { | |
320 | return; | |
321 | } | |
322 | ||
323 | msix_free_irq_entries(dev); | |
324 | qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE); | |
325 | qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); | |
326 | msix_update_function_masked(dev); | |
327 | } | |
328 | ||
329 | /* Does device support MSI-X? */ | |
330 | int msix_present(PCIDevice *dev) | |
331 | { | |
332 | return dev->cap_present & QEMU_PCI_CAP_MSIX; | |
333 | } | |
334 | ||
335 | /* Is MSI-X enabled? */ | |
336 | int msix_enabled(PCIDevice *dev) | |
337 | { | |
338 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) && | |
339 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & | |
340 | MSIX_ENABLE_MASK); | |
341 | } | |
342 | ||
343 | /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ | |
344 | uint32_t msix_bar_size(PCIDevice *dev) | |
345 | { | |
346 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) ? | |
347 | dev->msix_bar_size : 0; | |
348 | } | |
349 | ||
350 | /* Send an MSI-X message */ | |
351 | void msix_notify(PCIDevice *dev, unsigned vector) | |
352 | { | |
353 | uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE; | |
354 | uint64_t address; | |
355 | uint32_t data; | |
356 | ||
357 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) | |
358 | return; | |
359 | if (msix_is_masked(dev, vector)) { | |
360 | msix_set_pending(dev, vector); | |
361 | return; | |
362 | } | |
363 | ||
364 | address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); | |
365 | data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); | |
366 | stl_le_phys(address, data); | |
367 | } | |
368 | ||
369 | void msix_reset(PCIDevice *dev) | |
370 | { | |
371 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
372 | return; | |
373 | msix_free_irq_entries(dev); | |
374 | dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= | |
375 | ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; | |
376 | memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE); | |
377 | msix_mask_all(dev, dev->msix_entries_nr); | |
378 | } | |
379 | ||
380 | /* PCI spec suggests that devices make it possible for software to configure | |
381 | * less vectors than supported by the device, but does not specify a standard | |
382 | * mechanism for devices to do so. | |
383 | * | |
384 | * We support this by asking devices to declare vectors software is going to | |
385 | * actually use, and checking this on the notification path. Devices that | |
386 | * don't want to follow the spec suggestion can declare all vectors as used. */ | |
387 | ||
388 | /* Mark vector as used. */ | |
389 | int msix_vector_use(PCIDevice *dev, unsigned vector) | |
390 | { | |
391 | if (vector >= dev->msix_entries_nr) | |
392 | return -EINVAL; | |
393 | dev->msix_entry_used[vector]++; | |
394 | return 0; | |
395 | } | |
396 | ||
397 | /* Mark vector as unused. */ | |
398 | void msix_vector_unuse(PCIDevice *dev, unsigned vector) | |
399 | { | |
400 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { | |
401 | return; | |
402 | } | |
403 | if (--dev->msix_entry_used[vector]) { | |
404 | return; | |
405 | } | |
406 | msix_clr_pending(dev, vector); | |
407 | } | |
408 | ||
409 | void msix_unuse_all_vectors(PCIDevice *dev) | |
410 | { | |
411 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
412 | return; | |
413 | msix_free_irq_entries(dev); | |
414 | } |