]>
Commit | Line | Data |
---|---|---|
7c0fa8df KO |
1 | /* |
2 | * pcie_sriov.c: | |
3 | * | |
4 | * Implementation of SR/IOV emulation support. | |
5 | * | |
6 | * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
14 | #include "hw/pci/pci.h" | |
15 | #include "hw/pci/pcie.h" | |
16 | #include "hw/pci/pci_bus.h" | |
17 | #include "hw/qdev-properties.h" | |
18 | #include "qemu/error-report.h" | |
19 | #include "qemu/range.h" | |
20 | #include "qapi/error.h" | |
21 | #include "trace.h" | |
22 | ||
23 | static PCIDevice *register_vf(PCIDevice *pf, int devfn, | |
24 | const char *name, uint16_t vf_num); | |
25 | static void unregister_vfs(PCIDevice *dev); | |
26 | ||
27 | void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, | |
28 | const char *vfname, uint16_t vf_dev_id, | |
29 | uint16_t init_vfs, uint16_t total_vfs, | |
30 | uint16_t vf_offset, uint16_t vf_stride) | |
31 | { | |
32 | uint8_t *cfg = dev->config + offset; | |
33 | uint8_t *wmask; | |
34 | ||
35 | pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, | |
36 | offset, PCI_EXT_CAP_SRIOV_SIZEOF); | |
37 | dev->exp.sriov_cap = offset; | |
38 | dev->exp.sriov_pf.num_vfs = 0; | |
39 | dev->exp.sriov_pf.vfname = g_strdup(vfname); | |
40 | dev->exp.sriov_pf.vf = NULL; | |
41 | ||
42 | pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); | |
43 | pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride); | |
44 | ||
45 | /* | |
46 | * Mandatory page sizes to support. | |
47 | * Device implementations can call pcie_sriov_pf_add_sup_pgsize() | |
48 | * to set more bits: | |
49 | */ | |
50 | pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ); | |
51 | ||
52 | /* | |
53 | * Default is to use 4K pages, software can modify it | |
54 | * to any of the supported bits | |
55 | */ | |
56 | pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1); | |
57 | ||
58 | /* Set up device ID and initial/total number of VFs available */ | |
59 | pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id); | |
60 | pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs); | |
61 | pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs); | |
62 | pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0); | |
63 | ||
64 | /* Write enable control bits */ | |
65 | wmask = dev->wmask + offset; | |
66 | pci_set_word(wmask + PCI_SRIOV_CTRL, | |
67 | PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI); | |
68 | pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff); | |
69 | pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); | |
70 | ||
71 | qdev_prop_set_bit(&dev->qdev, "multifunction", true); | |
72 | } | |
73 | ||
74 | void pcie_sriov_pf_exit(PCIDevice *dev) | |
75 | { | |
76 | unregister_vfs(dev); | |
77 | g_free((char *)dev->exp.sriov_pf.vfname); | |
78 | dev->exp.sriov_pf.vfname = NULL; | |
79 | } | |
80 | ||
81 | void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, | |
82 | uint8_t type, dma_addr_t size) | |
83 | { | |
84 | uint32_t addr; | |
85 | uint64_t wmask; | |
86 | uint16_t sriov_cap = dev->exp.sriov_cap; | |
87 | ||
88 | assert(sriov_cap > 0); | |
89 | assert(region_num >= 0); | |
90 | assert(region_num < PCI_NUM_REGIONS); | |
91 | assert(region_num != PCI_ROM_SLOT); | |
92 | ||
93 | wmask = ~(size - 1); | |
94 | addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4; | |
95 | ||
96 | pci_set_long(dev->config + addr, type); | |
97 | if (!(type & PCI_BASE_ADDRESS_SPACE_IO) && | |
98 | type & PCI_BASE_ADDRESS_MEM_TYPE_64) { | |
99 | pci_set_quad(dev->wmask + addr, wmask); | |
100 | pci_set_quad(dev->cmask + addr, ~0ULL); | |
101 | } else { | |
102 | pci_set_long(dev->wmask + addr, wmask & 0xffffffff); | |
103 | pci_set_long(dev->cmask + addr, 0xffffffff); | |
104 | } | |
105 | dev->exp.sriov_pf.vf_bar_type[region_num] = type; | |
106 | } | |
107 | ||
108 | void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, | |
109 | MemoryRegion *memory) | |
110 | { | |
111 | PCIIORegion *r; | |
112 | PCIBus *bus = pci_get_bus(dev); | |
113 | uint8_t type; | |
114 | pcibus_t size = memory_region_size(memory); | |
115 | ||
116 | assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ | |
117 | assert(region_num >= 0); | |
118 | assert(region_num < PCI_NUM_REGIONS); | |
119 | type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; | |
120 | ||
121 | if (!is_power_of_2(size)) { | |
122 | error_report("%s: PCI region size must be a power" | |
123 | " of two - type=0x%x, size=0x%"FMT_PCIBUS, | |
124 | __func__, type, size); | |
125 | exit(1); | |
126 | } | |
127 | ||
128 | r = &dev->io_regions[region_num]; | |
129 | r->memory = memory; | |
130 | r->address_space = | |
131 | type & PCI_BASE_ADDRESS_SPACE_IO | |
132 | ? bus->address_space_io | |
133 | : bus->address_space_mem; | |
134 | r->size = size; | |
135 | r->type = type; | |
136 | ||
137 | r->addr = pci_bar_address(dev, region_num, r->type, r->size); | |
138 | if (r->addr != PCI_BAR_UNMAPPED) { | |
139 | memory_region_add_subregion_overlap(r->address_space, | |
140 | r->addr, r->memory, 1); | |
141 | } | |
142 | } | |
143 | ||
144 | static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, | |
145 | uint16_t vf_num) | |
146 | { | |
147 | PCIDevice *dev = pci_new(devfn, name); | |
148 | dev->exp.sriov_vf.pf = pf; | |
149 | dev->exp.sriov_vf.vf_number = vf_num; | |
150 | PCIBus *bus = pci_get_bus(pf); | |
151 | Error *local_err = NULL; | |
152 | ||
153 | qdev_realize(&dev->qdev, &bus->qbus, &local_err); | |
154 | if (local_err) { | |
155 | error_report_err(local_err); | |
156 | return NULL; | |
157 | } | |
158 | ||
159 | /* set vid/did according to sr/iov spec - they are not used */ | |
160 | pci_config_set_vendor_id(dev->config, 0xffff); | |
161 | pci_config_set_device_id(dev->config, 0xffff); | |
162 | ||
163 | return dev; | |
164 | } | |
165 | ||
166 | static void register_vfs(PCIDevice *dev) | |
167 | { | |
168 | uint16_t num_vfs; | |
169 | uint16_t i; | |
170 | uint16_t sriov_cap = dev->exp.sriov_cap; | |
171 | uint16_t vf_offset = | |
172 | pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); | |
173 | uint16_t vf_stride = | |
174 | pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); | |
175 | int32_t devfn = dev->devfn + vf_offset; | |
176 | ||
177 | assert(sriov_cap > 0); | |
178 | num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); | |
179 | ||
b21e2380 | 180 | dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); |
7c0fa8df KO |
181 | assert(dev->exp.sriov_pf.vf); |
182 | ||
183 | trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), | |
184 | PCI_FUNC(dev->devfn), num_vfs); | |
185 | for (i = 0; i < num_vfs; i++) { | |
186 | dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, | |
187 | dev->exp.sriov_pf.vfname, i); | |
188 | if (!dev->exp.sriov_pf.vf[i]) { | |
189 | num_vfs = i; | |
190 | break; | |
191 | } | |
192 | devfn += vf_stride; | |
193 | } | |
194 | dev->exp.sriov_pf.num_vfs = num_vfs; | |
195 | } | |
196 | ||
197 | static void unregister_vfs(PCIDevice *dev) | |
198 | { | |
199 | Error *local_err = NULL; | |
200 | uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; | |
201 | uint16_t i; | |
202 | ||
203 | trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), | |
204 | PCI_FUNC(dev->devfn), num_vfs); | |
205 | for (i = 0; i < num_vfs; i++) { | |
206 | PCIDevice *vf = dev->exp.sriov_pf.vf[i]; | |
207 | object_property_set_bool(OBJECT(vf), "realized", false, &local_err); | |
208 | if (local_err) { | |
209 | fprintf(stderr, "Failed to unplug: %s\n", | |
210 | error_get_pretty(local_err)); | |
211 | error_free(local_err); | |
212 | } | |
213 | object_unparent(OBJECT(vf)); | |
214 | } | |
215 | g_free(dev->exp.sriov_pf.vf); | |
216 | dev->exp.sriov_pf.vf = NULL; | |
217 | dev->exp.sriov_pf.num_vfs = 0; | |
218 | pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0); | |
219 | } | |
220 | ||
221 | void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, | |
222 | uint32_t val, int len) | |
223 | { | |
224 | uint32_t off; | |
225 | uint16_t sriov_cap = dev->exp.sriov_cap; | |
226 | ||
227 | if (!sriov_cap || address < sriov_cap) { | |
228 | return; | |
229 | } | |
230 | off = address - sriov_cap; | |
231 | if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) { | |
232 | return; | |
233 | } | |
234 | ||
235 | trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), | |
236 | PCI_FUNC(dev->devfn), off, val, len); | |
237 | ||
238 | if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { | |
239 | if (dev->exp.sriov_pf.num_vfs) { | |
240 | if (!(val & PCI_SRIOV_CTRL_VFE)) { | |
241 | unregister_vfs(dev); | |
242 | } | |
243 | } else { | |
244 | if (val & PCI_SRIOV_CTRL_VFE) { | |
245 | register_vfs(dev); | |
246 | } | |
247 | } | |
248 | } | |
249 | } | |
250 | ||
251 | ||
252 | /* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */ | |
253 | void pcie_sriov_pf_disable_vfs(PCIDevice *dev) | |
254 | { | |
255 | uint16_t sriov_cap = dev->exp.sriov_cap; | |
256 | if (sriov_cap) { | |
257 | uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL); | |
258 | if (val & PCI_SRIOV_CTRL_VFE) { | |
259 | val &= ~PCI_SRIOV_CTRL_VFE; | |
260 | pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1); | |
261 | } | |
262 | } | |
263 | } | |
264 | ||
265 | /* Add optional supported page sizes to the mask of supported page sizes */ | |
266 | void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) | |
267 | { | |
268 | uint8_t *cfg = dev->config + dev->exp.sriov_cap; | |
269 | uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; | |
270 | ||
271 | uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE); | |
272 | ||
273 | sup_pgsize |= opt_sup_pgsize; | |
274 | ||
275 | /* | |
276 | * Make sure the new bits are set, and that system page size | |
277 | * also can be set to any of the new values according to spec: | |
278 | */ | |
279 | pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize); | |
280 | pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize); | |
281 | } | |
282 | ||
283 | ||
284 | uint16_t pcie_sriov_vf_number(PCIDevice *dev) | |
285 | { | |
286 | assert(pci_is_vf(dev)); | |
287 | return dev->exp.sriov_vf.vf_number; | |
288 | } | |
289 | ||
7c0fa8df KO |
290 | PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) |
291 | { | |
292 | return dev->exp.sriov_vf.pf; | |
293 | } | |
69387f49 ŁG |
294 | |
295 | PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) | |
296 | { | |
297 | assert(!pci_is_vf(dev)); | |
298 | if (n < dev->exp.sriov_pf.num_vfs) { | |
299 | return dev->exp.sriov_pf.vf[n]; | |
300 | } | |
301 | return NULL; | |
302 | } |