]>
Commit | Line | Data |
---|---|---|
02eb84d0 MT |
1 | /* |
2 | * MSI-X device support | |
3 | * | |
4 | * This module includes support for MSI-X in pci devices. | |
5 | * | |
6 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
7 | * | |
8 | * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
11 | * the COPYING file in the top-level directory. | |
12 | */ | |
13 | ||
14 | #include "hw.h" | |
15 | #include "msix.h" | |
16 | #include "pci.h" | |
bf1b0071 | 17 | #include "range.h" |
02eb84d0 | 18 | |
02eb84d0 MT |
19 | /* MSI-X capability structure */ |
20 | #define MSIX_TABLE_OFFSET 4 | |
21 | #define MSIX_PBA_OFFSET 8 | |
22 | #define MSIX_CAP_LENGTH 12 | |
23 | ||
2760952b MT |
24 | /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ |
25 | #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) | |
02eb84d0 | 26 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) |
5b5cb086 | 27 | #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) |
02eb84d0 MT |
28 | |
29 | /* MSI-X table format */ | |
30 | #define MSIX_MSG_ADDR 0 | |
31 | #define MSIX_MSG_UPPER_ADDR 4 | |
32 | #define MSIX_MSG_DATA 8 | |
33 | #define MSIX_VECTOR_CTRL 12 | |
34 | #define MSIX_ENTRY_SIZE 16 | |
35 | #define MSIX_VECTOR_MASK 0x1 | |
5a1fc5e8 MT |
36 | |
37 | /* How much space does an MSIX table need. */ | |
38 | /* The spec requires giving the table structure | |
39 | * a 4K aligned region all by itself. */ | |
40 | #define MSIX_PAGE_SIZE 0x1000 | |
41 | /* Reserve second half of the page for pending bits */ | |
42 | #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2) | |
02eb84d0 MT |
43 | #define MSIX_MAX_ENTRIES 32 |
44 | ||
45 | ||
02eb84d0 MT |
46 | /* Flag for interrupt controller to declare MSI-X support */ |
47 | int msix_supported; | |
48 | ||
49 | /* Add MSI-X capability to the config space for the device. */ | |
50 | /* Given a bar and its size, add MSI-X table on top of it | |
51 | * and fill MSI-X capability in the config space. | |
52 | * Original bar size must be a power of 2 or 0. | |
53 | * New bar size is returned. */ | |
54 | static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, | |
55 | unsigned bar_nr, unsigned bar_size) | |
56 | { | |
57 | int config_offset; | |
58 | uint8_t *config; | |
59 | uint32_t new_size; | |
60 | ||
61 | if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) | |
62 | return -EINVAL; | |
63 | if (bar_size > 0x80000000) | |
64 | return -ENOSPC; | |
65 | ||
66 | /* Add space for MSI-X structures */ | |
5e520a7d | 67 | if (!bar_size) { |
5a1fc5e8 MT |
68 | new_size = MSIX_PAGE_SIZE; |
69 | } else if (bar_size < MSIX_PAGE_SIZE) { | |
70 | bar_size = MSIX_PAGE_SIZE; | |
71 | new_size = MSIX_PAGE_SIZE * 2; | |
72 | } else { | |
02eb84d0 | 73 | new_size = bar_size * 2; |
5a1fc5e8 | 74 | } |
02eb84d0 MT |
75 | |
76 | pdev->msix_bar_size = new_size; | |
ca77089d IY |
77 | config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, |
78 | 0, MSIX_CAP_LENGTH); | |
02eb84d0 MT |
79 | if (config_offset < 0) |
80 | return config_offset; | |
81 | config = pdev->config + config_offset; | |
82 | ||
83 | pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); | |
84 | /* Table on top of BAR */ | |
85 | pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr); | |
86 | /* Pending bits on top of that */ | |
5a1fc5e8 MT |
87 | pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) | |
88 | bar_nr); | |
02eb84d0 MT |
89 | pdev->msix_cap = config_offset; |
90 | /* Make flags bit writeable. */ | |
5b5cb086 MT |
91 | pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | |
92 | MSIX_MASKALL_MASK; | |
02eb84d0 MT |
93 | return 0; |
94 | } | |
95 | ||
c227f099 | 96 | static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) |
02eb84d0 MT |
97 | { |
98 | PCIDevice *dev = opaque; | |
76f5159d | 99 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
02eb84d0 | 100 | void *page = dev->msix_table_page; |
02eb84d0 | 101 | |
76f5159d | 102 | return pci_get_long(page + offset); |
02eb84d0 MT |
103 | } |
104 | ||
c227f099 | 105 | static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) |
02eb84d0 MT |
106 | { |
107 | fprintf(stderr, "MSI-X: only dword read is allowed!\n"); | |
108 | return 0; | |
109 | } | |
110 | ||
111 | static uint8_t msix_pending_mask(int vector) | |
112 | { | |
113 | return 1 << (vector % 8); | |
114 | } | |
115 | ||
116 | static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) | |
117 | { | |
5a1fc5e8 | 118 | return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8; |
02eb84d0 MT |
119 | } |
120 | ||
121 | static int msix_is_pending(PCIDevice *dev, int vector) | |
122 | { | |
123 | return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); | |
124 | } | |
125 | ||
126 | static void msix_set_pending(PCIDevice *dev, int vector) | |
127 | { | |
128 | *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); | |
129 | } | |
130 | ||
131 | static void msix_clr_pending(PCIDevice *dev, int vector) | |
132 | { | |
133 | *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); | |
134 | } | |
135 | ||
5b5cb086 MT |
136 | static int msix_function_masked(PCIDevice *dev) |
137 | { | |
138 | return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; | |
139 | } | |
140 | ||
02eb84d0 MT |
141 | static int msix_is_masked(PCIDevice *dev, int vector) |
142 | { | |
143 | unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL; | |
5b5cb086 MT |
144 | return msix_function_masked(dev) || |
145 | dev->msix_table_page[offset] & MSIX_VECTOR_MASK; | |
146 | } | |
147 | ||
148 | static void msix_handle_mask_update(PCIDevice *dev, int vector) | |
149 | { | |
150 | if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) { | |
151 | msix_clr_pending(dev, vector); | |
152 | msix_notify(dev, vector); | |
153 | } | |
154 | } | |
155 | ||
156 | /* Handle MSI-X capability config write. */ | |
157 | void msix_write_config(PCIDevice *dev, uint32_t addr, | |
158 | uint32_t val, int len) | |
159 | { | |
160 | unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; | |
161 | int vector; | |
57c6db2e | 162 | int i; |
5b5cb086 | 163 | |
98a3cb02 | 164 | if (!range_covers_byte(addr, len, enable_pos)) { |
5b5cb086 MT |
165 | return; |
166 | } | |
167 | ||
168 | if (!msix_enabled(dev)) { | |
169 | return; | |
170 | } | |
171 | ||
57c6db2e IY |
172 | for (i = 0; i < PCI_NUM_PINS; ++i) { |
173 | qemu_set_irq(dev->irq[i], 0); | |
174 | } | |
5b5cb086 MT |
175 | |
176 | if (msix_function_masked(dev)) { | |
177 | return; | |
178 | } | |
179 | ||
180 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
181 | msix_handle_mask_update(dev, vector); | |
182 | } | |
02eb84d0 MT |
183 | } |
184 | ||
c227f099 | 185 | static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, |
02eb84d0 MT |
186 | uint32_t val) |
187 | { | |
188 | PCIDevice *dev = opaque; | |
76f5159d | 189 | unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; |
02eb84d0 | 190 | int vector = offset / MSIX_ENTRY_SIZE; |
76f5159d | 191 | pci_set_long(dev->msix_table_page + offset, val); |
5b5cb086 | 192 | msix_handle_mask_update(dev, vector); |
02eb84d0 MT |
193 | } |
194 | ||
c227f099 | 195 | static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, |
02eb84d0 MT |
196 | uint32_t val) |
197 | { | |
198 | fprintf(stderr, "MSI-X: only dword write is allowed!\n"); | |
199 | } | |
200 | ||
d60efc6b | 201 | static CPUWriteMemoryFunc * const msix_mmio_write[] = { |
02eb84d0 MT |
202 | msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel |
203 | }; | |
204 | ||
d60efc6b | 205 | static CPUReadMemoryFunc * const msix_mmio_read[] = { |
02eb84d0 MT |
206 | msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl |
207 | }; | |
208 | ||
209 | /* Should be called from device's map method. */ | |
210 | void msix_mmio_map(PCIDevice *d, int region_num, | |
6e355d90 | 211 | pcibus_t addr, pcibus_t size, int type) |
02eb84d0 MT |
212 | { |
213 | uint8_t *config = d->config + d->msix_cap; | |
214 | uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET); | |
5a1fc5e8 | 215 | uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1); |
02eb84d0 MT |
216 | /* TODO: for assigned devices, we'll want to make it possible to map |
217 | * pending bits separately in case they are in a separate bar. */ | |
218 | int table_bir = table & PCI_MSIX_FLAGS_BIRMASK; | |
219 | ||
220 | if (table_bir != region_num) | |
221 | return; | |
222 | if (size <= offset) | |
223 | return; | |
224 | cpu_register_physical_memory(addr + offset, size - offset, | |
225 | d->msix_mmio_index); | |
226 | } | |
227 | ||
ae1be0bb MT |
228 | static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) |
229 | { | |
230 | int vector; | |
231 | for (vector = 0; vector < nentries; ++vector) { | |
232 | unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL; | |
233 | dev->msix_table_page[offset] |= MSIX_VECTOR_MASK; | |
234 | } | |
235 | } | |
236 | ||
02eb84d0 MT |
237 | /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is |
238 | * modified, it should be retrieved with msix_bar_size. */ | |
239 | int msix_init(struct PCIDevice *dev, unsigned short nentries, | |
5a1fc5e8 | 240 | unsigned bar_nr, unsigned bar_size) |
02eb84d0 MT |
241 | { |
242 | int ret; | |
243 | /* Nothing to do if MSI is not supported by interrupt controller */ | |
244 | if (!msix_supported) | |
245 | return -ENOTSUP; | |
246 | ||
247 | if (nentries > MSIX_MAX_ENTRIES) | |
248 | return -EINVAL; | |
249 | ||
250 | dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES * | |
251 | sizeof *dev->msix_entry_used); | |
252 | ||
5a1fc5e8 | 253 | dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE); |
ae1be0bb | 254 | msix_mask_all(dev, nentries); |
02eb84d0 MT |
255 | |
256 | dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read, | |
257 | msix_mmio_write, dev); | |
258 | if (dev->msix_mmio_index == -1) { | |
259 | ret = -EBUSY; | |
260 | goto err_index; | |
261 | } | |
262 | ||
263 | dev->msix_entries_nr = nentries; | |
264 | ret = msix_add_config(dev, nentries, bar_nr, bar_size); | |
265 | if (ret) | |
266 | goto err_config; | |
267 | ||
268 | dev->cap_present |= QEMU_PCI_CAP_MSIX; | |
269 | return 0; | |
270 | ||
271 | err_config: | |
3174ecd1 | 272 | dev->msix_entries_nr = 0; |
02eb84d0 MT |
273 | cpu_unregister_io_memory(dev->msix_mmio_index); |
274 | err_index: | |
275 | qemu_free(dev->msix_table_page); | |
276 | dev->msix_table_page = NULL; | |
277 | qemu_free(dev->msix_entry_used); | |
278 | dev->msix_entry_used = NULL; | |
279 | return ret; | |
280 | } | |
281 | ||
98304c84 MT |
282 | static void msix_free_irq_entries(PCIDevice *dev) |
283 | { | |
284 | int vector; | |
285 | ||
286 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) { | |
287 | dev->msix_entry_used[vector] = 0; | |
288 | msix_clr_pending(dev, vector); | |
289 | } | |
290 | } | |
291 | ||
02eb84d0 MT |
292 | /* Clean up resources for the device. */ |
293 | int msix_uninit(PCIDevice *dev) | |
294 | { | |
295 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
296 | return 0; | |
297 | pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
298 | dev->msix_cap = 0; | |
299 | msix_free_irq_entries(dev); | |
300 | dev->msix_entries_nr = 0; | |
301 | cpu_unregister_io_memory(dev->msix_mmio_index); | |
302 | qemu_free(dev->msix_table_page); | |
303 | dev->msix_table_page = NULL; | |
304 | qemu_free(dev->msix_entry_used); | |
305 | dev->msix_entry_used = NULL; | |
306 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | |
307 | return 0; | |
308 | } | |
309 | ||
310 | void msix_save(PCIDevice *dev, QEMUFile *f) | |
311 | { | |
9a3e12c8 MT |
312 | unsigned n = dev->msix_entries_nr; |
313 | ||
72755a70 | 314 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
9a3e12c8 | 315 | return; |
72755a70 | 316 | } |
9a3e12c8 MT |
317 | |
318 | qemu_put_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE); | |
5a1fc5e8 | 319 | qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
02eb84d0 MT |
320 | } |
321 | ||
322 | /* Should be called after restoring the config space. */ | |
323 | void msix_load(PCIDevice *dev, QEMUFile *f) | |
324 | { | |
325 | unsigned n = dev->msix_entries_nr; | |
326 | ||
98846d73 | 327 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
02eb84d0 | 328 | return; |
98846d73 | 329 | } |
02eb84d0 | 330 | |
4bfd1712 | 331 | msix_free_irq_entries(dev); |
02eb84d0 | 332 | qemu_get_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE); |
5a1fc5e8 | 333 | qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); |
02eb84d0 MT |
334 | } |
335 | ||
336 | /* Does device support MSI-X? */ | |
337 | int msix_present(PCIDevice *dev) | |
338 | { | |
339 | return dev->cap_present & QEMU_PCI_CAP_MSIX; | |
340 | } | |
341 | ||
342 | /* Is MSI-X enabled? */ | |
343 | int msix_enabled(PCIDevice *dev) | |
344 | { | |
345 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) && | |
2760952b | 346 | (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & |
02eb84d0 MT |
347 | MSIX_ENABLE_MASK); |
348 | } | |
349 | ||
350 | /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ | |
351 | uint32_t msix_bar_size(PCIDevice *dev) | |
352 | { | |
353 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) ? | |
354 | dev->msix_bar_size : 0; | |
355 | } | |
356 | ||
357 | /* Send an MSI-X message */ | |
358 | void msix_notify(PCIDevice *dev, unsigned vector) | |
359 | { | |
360 | uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE; | |
361 | uint64_t address; | |
362 | uint32_t data; | |
363 | ||
364 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) | |
365 | return; | |
366 | if (msix_is_masked(dev, vector)) { | |
367 | msix_set_pending(dev, vector); | |
368 | return; | |
369 | } | |
370 | ||
371 | address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); | |
372 | address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR); | |
373 | data = pci_get_long(table_entry + MSIX_MSG_DATA); | |
374 | stl_phys(address, data); | |
375 | } | |
376 | ||
377 | void msix_reset(PCIDevice *dev) | |
378 | { | |
379 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
380 | return; | |
381 | msix_free_irq_entries(dev); | |
2760952b MT |
382 | dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= |
383 | ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; | |
5a1fc5e8 | 384 | memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE); |
ae1be0bb | 385 | msix_mask_all(dev, dev->msix_entries_nr); |
02eb84d0 MT |
386 | } |
387 | ||
388 | /* PCI spec suggests that devices make it possible for software to configure | |
389 | * less vectors than supported by the device, but does not specify a standard | |
390 | * mechanism for devices to do so. | |
391 | * | |
392 | * We support this by asking devices to declare vectors software is going to | |
393 | * actually use, and checking this on the notification path. Devices that | |
394 | * don't want to follow the spec suggestion can declare all vectors as used. */ | |
395 | ||
396 | /* Mark vector as used. */ | |
397 | int msix_vector_use(PCIDevice *dev, unsigned vector) | |
398 | { | |
399 | if (vector >= dev->msix_entries_nr) | |
400 | return -EINVAL; | |
401 | dev->msix_entry_used[vector]++; | |
402 | return 0; | |
403 | } | |
404 | ||
405 | /* Mark vector as unused. */ | |
406 | void msix_vector_unuse(PCIDevice *dev, unsigned vector) | |
407 | { | |
98304c84 MT |
408 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { |
409 | return; | |
410 | } | |
411 | if (--dev->msix_entry_used[vector]) { | |
412 | return; | |
413 | } | |
414 | msix_clr_pending(dev, vector); | |
02eb84d0 | 415 | } |
b5f28bca MT |
416 | |
417 | void msix_unuse_all_vectors(PCIDevice *dev) | |
418 | { | |
419 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
420 | return; | |
421 | msix_free_irq_entries(dev); | |
422 | } |