]>
Commit | Line | Data |
---|---|---|
02eb84d0 MT |
1 | /* |
2 | * MSI-X device support | |
3 | * | |
4 | * This module includes support for MSI-X in pci devices. | |
5 | * | |
6 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
7 | * | |
8 | * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) | |
9 | * | |
10 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
11 | * the COPYING file in the top-level directory. | |
12 | */ | |
13 | ||
14 | #include "hw.h" | |
15 | #include "msix.h" | |
16 | #include "pci.h" | |
17 | ||
18 | /* Declaration from linux/pci_regs.h */ | |
19 | #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ | |
20 | #define PCI_MSIX_FLAGS 2 /* Table at lower 11 bits */ | |
21 | #define PCI_MSIX_FLAGS_QSIZE 0x7FF | |
22 | #define PCI_MSIX_FLAGS_ENABLE (1 << 15) | |
23 | #define PCI_MSIX_FLAGS_BIRMASK (7 << 0) | |
24 | ||
25 | /* MSI-X capability structure */ | |
26 | #define MSIX_TABLE_OFFSET 4 | |
27 | #define MSIX_PBA_OFFSET 8 | |
28 | #define MSIX_CAP_LENGTH 12 | |
29 | ||
30 | /* MSI enable bit is in byte 1 in FLAGS register */ | |
31 | #define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1) | |
32 | #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) | |
33 | ||
34 | /* MSI-X table format */ | |
35 | #define MSIX_MSG_ADDR 0 | |
36 | #define MSIX_MSG_UPPER_ADDR 4 | |
37 | #define MSIX_MSG_DATA 8 | |
38 | #define MSIX_VECTOR_CTRL 12 | |
39 | #define MSIX_ENTRY_SIZE 16 | |
40 | #define MSIX_VECTOR_MASK 0x1 | |
02eb84d0 MT |
41 | #define MSIX_MAX_ENTRIES 32 |
42 | ||
43 | ||
44 | #ifdef MSIX_DEBUG | |
45 | #define DEBUG(fmt, ...) \ | |
46 | do { \ | |
47 | fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ | |
48 | } while (0) | |
49 | #else | |
50 | #define DEBUG(fmt, ...) do { } while(0) | |
51 | #endif | |
52 | ||
53 | /* Flag for interrupt controller to declare MSI-X support */ | |
54 | int msix_supported; | |
55 | ||
5e520a7d BS |
56 | /* Reserve second half of the page for pending bits */ |
57 | static int msix_page_pending(PCIDevice *d) | |
58 | { | |
59 | return (d->msix_page_size / 2); | |
60 | } | |
61 | ||
02eb84d0 MT |
62 | /* Add MSI-X capability to the config space for the device. */ |
63 | /* Given a bar and its size, add MSI-X table on top of it | |
64 | * and fill MSI-X capability in the config space. | |
65 | * Original bar size must be a power of 2 or 0. | |
66 | * New bar size is returned. */ | |
67 | static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries, | |
68 | unsigned bar_nr, unsigned bar_size) | |
69 | { | |
70 | int config_offset; | |
71 | uint8_t *config; | |
72 | uint32_t new_size; | |
73 | ||
74 | if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) | |
75 | return -EINVAL; | |
76 | if (bar_size > 0x80000000) | |
77 | return -ENOSPC; | |
78 | ||
79 | /* Add space for MSI-X structures */ | |
5e520a7d BS |
80 | if (!bar_size) { |
81 | new_size = pdev->msix_page_size; | |
82 | } else if (bar_size < pdev->msix_page_size) { | |
83 | bar_size = pdev->msix_page_size; | |
84 | new_size = pdev->msix_page_size * 2; | |
02eb84d0 MT |
85 | } else |
86 | new_size = bar_size * 2; | |
87 | ||
88 | pdev->msix_bar_size = new_size; | |
89 | config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
90 | if (config_offset < 0) | |
91 | return config_offset; | |
92 | config = pdev->config + config_offset; | |
93 | ||
94 | pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); | |
95 | /* Table on top of BAR */ | |
96 | pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr); | |
97 | /* Pending bits on top of that */ | |
5e520a7d BS |
98 | pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + msix_page_pending(pdev)) |
99 | | bar_nr); | |
02eb84d0 MT |
100 | pdev->msix_cap = config_offset; |
101 | /* Make flags bit writeable. */ | |
102 | pdev->wmask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK; | |
103 | return 0; | |
104 | } | |
105 | ||
106 | static void msix_free_irq_entries(PCIDevice *dev) | |
107 | { | |
108 | int vector; | |
109 | ||
110 | for (vector = 0; vector < dev->msix_entries_nr; ++vector) | |
111 | dev->msix_entry_used[vector] = 0; | |
112 | } | |
113 | ||
114 | /* Handle MSI-X capability config write. */ | |
115 | void msix_write_config(PCIDevice *dev, uint32_t addr, | |
116 | uint32_t val, int len) | |
117 | { | |
118 | unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET; | |
119 | if (addr + len <= enable_pos || addr > enable_pos) | |
120 | return; | |
121 | ||
122 | if (msix_enabled(dev)) | |
123 | qemu_set_irq(dev->irq[0], 0); | |
124 | } | |
125 | ||
c227f099 | 126 | static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) |
02eb84d0 MT |
127 | { |
128 | PCIDevice *dev = opaque; | |
5e520a7d | 129 | unsigned int offset = addr & (dev->msix_page_size - 1); |
02eb84d0 MT |
130 | void *page = dev->msix_table_page; |
131 | uint32_t val = 0; | |
132 | ||
133 | memcpy(&val, (void *)((char *)page + offset), 4); | |
134 | ||
135 | return val; | |
136 | } | |
137 | ||
c227f099 | 138 | static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr) |
02eb84d0 MT |
139 | { |
140 | fprintf(stderr, "MSI-X: only dword read is allowed!\n"); | |
141 | return 0; | |
142 | } | |
143 | ||
144 | static uint8_t msix_pending_mask(int vector) | |
145 | { | |
146 | return 1 << (vector % 8); | |
147 | } | |
148 | ||
149 | static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) | |
150 | { | |
5e520a7d | 151 | return dev->msix_table_page + msix_page_pending(dev) + vector / 8; |
02eb84d0 MT |
152 | } |
153 | ||
154 | static int msix_is_pending(PCIDevice *dev, int vector) | |
155 | { | |
156 | return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); | |
157 | } | |
158 | ||
159 | static void msix_set_pending(PCIDevice *dev, int vector) | |
160 | { | |
161 | *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); | |
162 | } | |
163 | ||
164 | static void msix_clr_pending(PCIDevice *dev, int vector) | |
165 | { | |
166 | *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); | |
167 | } | |
168 | ||
169 | static int msix_is_masked(PCIDevice *dev, int vector) | |
170 | { | |
171 | unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL; | |
172 | return dev->msix_table_page[offset] & MSIX_VECTOR_MASK; | |
173 | } | |
174 | ||
c227f099 | 175 | static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, |
02eb84d0 MT |
176 | uint32_t val) |
177 | { | |
178 | PCIDevice *dev = opaque; | |
5e520a7d | 179 | unsigned int offset = addr & (dev->msix_page_size - 1); |
02eb84d0 MT |
180 | int vector = offset / MSIX_ENTRY_SIZE; |
181 | memcpy(dev->msix_table_page + offset, &val, 4); | |
182 | if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) { | |
183 | msix_clr_pending(dev, vector); | |
184 | msix_notify(dev, vector); | |
185 | } | |
186 | } | |
187 | ||
c227f099 | 188 | static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr, |
02eb84d0 MT |
189 | uint32_t val) |
190 | { | |
191 | fprintf(stderr, "MSI-X: only dword write is allowed!\n"); | |
192 | } | |
193 | ||
d60efc6b | 194 | static CPUWriteMemoryFunc * const msix_mmio_write[] = { |
02eb84d0 MT |
195 | msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel |
196 | }; | |
197 | ||
d60efc6b | 198 | static CPUReadMemoryFunc * const msix_mmio_read[] = { |
02eb84d0 MT |
199 | msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl |
200 | }; | |
201 | ||
202 | /* Should be called from device's map method. */ | |
203 | void msix_mmio_map(PCIDevice *d, int region_num, | |
204 | uint32_t addr, uint32_t size, int type) | |
205 | { | |
206 | uint8_t *config = d->config + d->msix_cap; | |
207 | uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET); | |
5e520a7d | 208 | uint32_t offset = table & ~(d->msix_page_size - 1); |
02eb84d0 MT |
209 | /* TODO: for assigned devices, we'll want to make it possible to map |
210 | * pending bits separately in case they are in a separate bar. */ | |
211 | int table_bir = table & PCI_MSIX_FLAGS_BIRMASK; | |
212 | ||
213 | if (table_bir != region_num) | |
214 | return; | |
215 | if (size <= offset) | |
216 | return; | |
217 | cpu_register_physical_memory(addr + offset, size - offset, | |
218 | d->msix_mmio_index); | |
219 | } | |
220 | ||
221 | /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is | |
222 | * modified, it should be retrieved with msix_bar_size. */ | |
223 | int msix_init(struct PCIDevice *dev, unsigned short nentries, | |
c227f099 | 224 | unsigned bar_nr, unsigned bar_size, target_phys_addr_t page_size) |
02eb84d0 MT |
225 | { |
226 | int ret; | |
227 | /* Nothing to do if MSI is not supported by interrupt controller */ | |
228 | if (!msix_supported) | |
229 | return -ENOTSUP; | |
230 | ||
231 | if (nentries > MSIX_MAX_ENTRIES) | |
232 | return -EINVAL; | |
233 | ||
234 | dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES * | |
235 | sizeof *dev->msix_entry_used); | |
236 | ||
5e520a7d BS |
237 | dev->msix_page_size = page_size; |
238 | dev->msix_table_page = qemu_mallocz(dev->msix_page_size); | |
02eb84d0 MT |
239 | |
240 | dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read, | |
241 | msix_mmio_write, dev); | |
242 | if (dev->msix_mmio_index == -1) { | |
243 | ret = -EBUSY; | |
244 | goto err_index; | |
245 | } | |
246 | ||
247 | dev->msix_entries_nr = nentries; | |
248 | ret = msix_add_config(dev, nentries, bar_nr, bar_size); | |
249 | if (ret) | |
250 | goto err_config; | |
251 | ||
252 | dev->cap_present |= QEMU_PCI_CAP_MSIX; | |
253 | return 0; | |
254 | ||
255 | err_config: | |
3174ecd1 | 256 | dev->msix_entries_nr = 0; |
02eb84d0 MT |
257 | cpu_unregister_io_memory(dev->msix_mmio_index); |
258 | err_index: | |
259 | qemu_free(dev->msix_table_page); | |
260 | dev->msix_table_page = NULL; | |
261 | qemu_free(dev->msix_entry_used); | |
262 | dev->msix_entry_used = NULL; | |
263 | return ret; | |
264 | } | |
265 | ||
266 | /* Clean up resources for the device. */ | |
267 | int msix_uninit(PCIDevice *dev) | |
268 | { | |
269 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
270 | return 0; | |
271 | pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); | |
272 | dev->msix_cap = 0; | |
273 | msix_free_irq_entries(dev); | |
274 | dev->msix_entries_nr = 0; | |
275 | cpu_unregister_io_memory(dev->msix_mmio_index); | |
276 | qemu_free(dev->msix_table_page); | |
277 | dev->msix_table_page = NULL; | |
278 | qemu_free(dev->msix_entry_used); | |
279 | dev->msix_entry_used = NULL; | |
280 | dev->cap_present &= ~QEMU_PCI_CAP_MSIX; | |
281 | return 0; | |
282 | } | |
283 | ||
284 | void msix_save(PCIDevice *dev, QEMUFile *f) | |
285 | { | |
9a3e12c8 MT |
286 | unsigned n = dev->msix_entries_nr; |
287 | ||
72755a70 | 288 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
9a3e12c8 | 289 | return; |
72755a70 | 290 | } |
9a3e12c8 MT |
291 | |
292 | qemu_put_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE); | |
5e520a7d BS |
293 | qemu_put_buffer(f, dev->msix_table_page + msix_page_pending(dev), |
294 | (n + 7) / 8); | |
02eb84d0 MT |
295 | } |
296 | ||
297 | /* Should be called after restoring the config space. */ | |
298 | void msix_load(PCIDevice *dev, QEMUFile *f) | |
299 | { | |
300 | unsigned n = dev->msix_entries_nr; | |
301 | ||
98846d73 | 302 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { |
02eb84d0 | 303 | return; |
98846d73 | 304 | } |
02eb84d0 | 305 | |
4bfd1712 | 306 | msix_free_irq_entries(dev); |
02eb84d0 | 307 | qemu_get_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE); |
5e520a7d BS |
308 | qemu_get_buffer(f, dev->msix_table_page + msix_page_pending(dev), |
309 | (n + 7) / 8); | |
02eb84d0 MT |
310 | } |
311 | ||
312 | /* Does device support MSI-X? */ | |
313 | int msix_present(PCIDevice *dev) | |
314 | { | |
315 | return dev->cap_present & QEMU_PCI_CAP_MSIX; | |
316 | } | |
317 | ||
318 | /* Is MSI-X enabled? */ | |
319 | int msix_enabled(PCIDevice *dev) | |
320 | { | |
321 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) && | |
322 | (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] & | |
323 | MSIX_ENABLE_MASK); | |
324 | } | |
325 | ||
326 | /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */ | |
327 | uint32_t msix_bar_size(PCIDevice *dev) | |
328 | { | |
329 | return (dev->cap_present & QEMU_PCI_CAP_MSIX) ? | |
330 | dev->msix_bar_size : 0; | |
331 | } | |
332 | ||
333 | /* Send an MSI-X message */ | |
334 | void msix_notify(PCIDevice *dev, unsigned vector) | |
335 | { | |
336 | uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE; | |
337 | uint64_t address; | |
338 | uint32_t data; | |
339 | ||
340 | if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) | |
341 | return; | |
342 | if (msix_is_masked(dev, vector)) { | |
343 | msix_set_pending(dev, vector); | |
344 | return; | |
345 | } | |
346 | ||
347 | address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); | |
348 | address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR); | |
349 | data = pci_get_long(table_entry + MSIX_MSG_DATA); | |
350 | stl_phys(address, data); | |
351 | } | |
352 | ||
353 | void msix_reset(PCIDevice *dev) | |
354 | { | |
355 | if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) | |
356 | return; | |
357 | msix_free_irq_entries(dev); | |
358 | dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK; | |
5e520a7d | 359 | memset(dev->msix_table_page, 0, dev->msix_page_size); |
02eb84d0 MT |
360 | } |
361 | ||
362 | /* PCI spec suggests that devices make it possible for software to configure | |
363 | * less vectors than supported by the device, but does not specify a standard | |
364 | * mechanism for devices to do so. | |
365 | * | |
366 | * We support this by asking devices to declare vectors software is going to | |
367 | * actually use, and checking this on the notification path. Devices that | |
368 | * don't want to follow the spec suggestion can declare all vectors as used. */ | |
369 | ||
370 | /* Mark vector as used. */ | |
371 | int msix_vector_use(PCIDevice *dev, unsigned vector) | |
372 | { | |
373 | if (vector >= dev->msix_entries_nr) | |
374 | return -EINVAL; | |
375 | dev->msix_entry_used[vector]++; | |
376 | return 0; | |
377 | } | |
378 | ||
379 | /* Mark vector as unused. */ | |
380 | void msix_vector_unuse(PCIDevice *dev, unsigned vector) | |
381 | { | |
382 | if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) | |
383 | --dev->msix_entry_used[vector]; | |
384 | } |