]>
Commit | Line | Data |
---|---|---|
de9768f0 DC |
1 | package PVE::QemuServer::PCI; |
2 | ||
41af2dfc TL |
3 | use warnings; |
4 | use strict; | |
5 | ||
74c17b7a | 6 | use PVE::JSONSchema; |
9b71c34d | 7 | use PVE::Mapping::PCI; |
74c17b7a | 8 | use PVE::SysFSTools; |
3bfee796 | 9 | use PVE::Tools; |
74c17b7a | 10 | |
de9768f0 DC |
11 | use base 'Exporter'; |
12 | ||
13 | our @EXPORT_OK = qw( | |
14 | print_pci_addr | |
15 | print_pcie_addr | |
c4e16381 | 16 | print_pcie_root_port |
74c17b7a | 17 | parse_hostpci |
de9768f0 DC |
18 | ); |
19 | ||
74c17b7a SR |
20 | our $MAX_HOSTPCI_DEVICES = 16; |
21 | ||
d8a7e9e8 | 22 | my $PCIRE = qr/(?:[a-f0-9]{4,}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/; |
74c17b7a SR |
23 | my $hostpci_fmt = { |
24 | host => { | |
25 | default_key => 1, | |
9b71c34d | 26 | optional => 1, |
74c17b7a SR |
27 | type => 'string', |
28 | pattern => qr/$PCIRE(;$PCIRE)*/, | |
29 | format_description => 'HOSTPCIID[;HOSTPCIID2...]', | |
30 | description => <<EODESCR, | |
31 | Host PCI device pass through. The PCI ID of a host's PCI device or a list | |
32 | of PCI virtual functions of the host. HOSTPCIID syntax is: | |
33 | ||
34 | 'bus:dev.func' (hexadecimal numbers) | |
35 | ||
36 | You can us the 'lspci' command to list existing PCI devices. | |
9b71c34d DC |
37 | |
38 | Either this or the 'mapping' key must be set. | |
74c17b7a SR |
39 | EODESCR |
40 | }, | |
9b71c34d DC |
41 | mapping => { |
42 | optional => 1, | |
43 | type => 'string', | |
44 | format_description => 'mapping-id', | |
45 | format => 'pve-configid', | |
46 | description => "The ID of a cluster wide mapping. Either this or the default-key 'host'" | |
47 | ." must be set.", | |
48 | }, | |
74c17b7a SR |
49 | rombar => { |
50 | type => 'boolean', | |
1fac3a0b TL |
51 | description => "Specify whether or not the device's ROM will be visible in the" |
52 | ." guest's memory map.", | |
74c17b7a SR |
53 | optional => 1, |
54 | default => 1, | |
55 | }, | |
56 | romfile => { | |
1fac3a0b TL |
57 | type => 'string', |
58 | pattern => '[^,;]+', | |
59 | format_description => 'string', | |
60 | description => "Custom pci device rom filename (must be located in /usr/share/kvm/).", | |
61 | optional => 1, | |
74c17b7a SR |
62 | }, |
63 | pcie => { | |
64 | type => 'boolean', | |
1fac3a0b | 65 | description => "Choose the PCI-express bus (needs the 'q35' machine model).", |
74c17b7a SR |
66 | optional => 1, |
67 | default => 0, | |
68 | }, | |
69 | 'x-vga' => { | |
70 | type => 'boolean', | |
1fac3a0b | 71 | description => "Enable vfio-vga device support.", |
74c17b7a SR |
72 | optional => 1, |
73 | default => 0, | |
74 | }, | |
13d68979 SR |
75 | 'legacy-igd' => { |
76 | type => 'boolean', | |
1fac3a0b TL |
77 | description => "Pass this device in legacy IGD mode, making it the primary and exclusive" |
78 | ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.", | |
13d68979 SR |
79 | optional => 1, |
80 | default => 0, | |
81 | }, | |
74c17b7a SR |
82 | 'mdev' => { |
83 | type => 'string', | |
1fac3a0b | 84 | format_description => 'string', |
74c17b7a SR |
85 | pattern => '[^/\.:]+', |
86 | optional => 1, | |
87 | description => <<EODESCR | |
88 | The type of mediated device to use. | |
89 | An instance of this type will be created on startup of the VM and | |
90 | will be cleaned up when the VM stops. | |
91 | EODESCR | |
d806b017 NS |
92 | }, |
93 | 'vendor-id' => { | |
94 | type => 'string', | |
95 | pattern => qr/^0x[0-9a-fA-F]{4}$/, | |
96 | format_description => 'hex id', | |
97 | optional => 1, | |
98 | description => "Override PCI vendor ID visible to guest" | |
99 | }, | |
100 | 'device-id' => { | |
101 | type => 'string', | |
102 | pattern => qr/^0x[0-9a-fA-F]{4}$/, | |
103 | format_description => 'hex id', | |
104 | optional => 1, | |
105 | description => "Override PCI device ID visible to guest" | |
106 | }, | |
107 | 'sub-vendor-id' => { | |
108 | type => 'string', | |
109 | pattern => qr/^0x[0-9a-fA-F]{4}$/, | |
110 | format_description => 'hex id', | |
111 | optional => 1, | |
112 | description => "Override PCI subsystem vendor ID visible to guest" | |
113 | }, | |
114 | 'sub-device-id' => { | |
115 | type => 'string', | |
116 | pattern => qr/^0x[0-9a-fA-F]{4}$/, | |
117 | format_description => 'hex id', | |
118 | optional => 1, | |
119 | description => "Override PCI subsystem device ID visible to guest" | |
74c17b7a SR |
120 | } |
121 | }; | |
122 | PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt); | |
123 | ||
124 | our $hostpcidesc = { | |
1fac3a0b TL |
125 | optional => 1, |
126 | type => 'string', format => 'pve-qm-hostpci', | |
127 | description => "Map host PCI devices into guest.", | |
74c17b7a SR |
128 | verbose_description => <<EODESCR, |
129 | Map host PCI devices into guest. | |
130 | ||
131 | NOTE: This option allows direct access to host hardware. So it is no longer | |
132 | possible to migrate such machines - use with special care. | |
133 | ||
134 | CAUTION: Experimental! User reported problems with this option. | |
135 | EODESCR | |
136 | }; | |
137 | PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc); | |
138 | ||
d7d698f6 TL |
139 | my $pci_addr_map; |
140 | sub get_pci_addr_map { | |
141 | $pci_addr_map = { | |
142 | piix3 => { bus => 0, addr => 1, conflict_ok => qw(ehci) }, | |
143 | ehci => { bus => 0, addr => 1, conflict_ok => qw(piix3) }, # instead of piix3 on arm | |
13d68979 SR |
144 | vga => { bus => 0, addr => 2, conflict_ok => qw(legacy-igd) }, |
145 | 'legacy-igd' => { bus => 0, addr => 2, conflict_ok => qw(vga) }, # legacy-igd requires vga=none | |
d7d698f6 TL |
146 | balloon0 => { bus => 0, addr => 3 }, |
147 | watchdog => { bus => 0, addr => 4 }, | |
148 | scsihw0 => { bus => 0, addr => 5, conflict_ok => qw(pci.3) }, | |
149 | 'pci.3' => { bus => 0, addr => 5, conflict_ok => qw(scsihw0) }, # also used for virtio-scsi-single bridge | |
150 | scsihw1 => { bus => 0, addr => 6 }, | |
151 | ahci0 => { bus => 0, addr => 7 }, | |
152 | qga0 => { bus => 0, addr => 8 }, | |
153 | spice => { bus => 0, addr => 9 }, | |
154 | virtio0 => { bus => 0, addr => 10 }, | |
155 | virtio1 => { bus => 0, addr => 11 }, | |
156 | virtio2 => { bus => 0, addr => 12 }, | |
157 | virtio3 => { bus => 0, addr => 13 }, | |
158 | virtio4 => { bus => 0, addr => 14 }, | |
159 | virtio5 => { bus => 0, addr => 15 }, | |
160 | hostpci0 => { bus => 0, addr => 16 }, | |
161 | hostpci1 => { bus => 0, addr => 17 }, | |
162 | net0 => { bus => 0, addr => 18 }, | |
163 | net1 => { bus => 0, addr => 19 }, | |
164 | net2 => { bus => 0, addr => 20 }, | |
165 | net3 => { bus => 0, addr => 21 }, | |
166 | net4 => { bus => 0, addr => 22 }, | |
167 | net5 => { bus => 0, addr => 23 }, | |
168 | vga1 => { bus => 0, addr => 24 }, | |
169 | vga2 => { bus => 0, addr => 25 }, | |
170 | vga3 => { bus => 0, addr => 26 }, | |
171 | hostpci2 => { bus => 0, addr => 27 }, | |
172 | hostpci3 => { bus => 0, addr => 28 }, | |
173 | #addr29 : usb-host (pve-usb.cfg) | |
174 | 'pci.1' => { bus => 0, addr => 30 }, | |
175 | 'pci.2' => { bus => 0, addr => 31 }, | |
176 | 'net6' => { bus => 1, addr => 1 }, | |
177 | 'net7' => { bus => 1, addr => 2 }, | |
178 | 'net8' => { bus => 1, addr => 3 }, | |
179 | 'net9' => { bus => 1, addr => 4 }, | |
180 | 'net10' => { bus => 1, addr => 5 }, | |
181 | 'net11' => { bus => 1, addr => 6 }, | |
182 | 'net12' => { bus => 1, addr => 7 }, | |
183 | 'net13' => { bus => 1, addr => 8 }, | |
184 | 'net14' => { bus => 1, addr => 9 }, | |
185 | 'net15' => { bus => 1, addr => 10 }, | |
186 | 'net16' => { bus => 1, addr => 11 }, | |
187 | 'net17' => { bus => 1, addr => 12 }, | |
188 | 'net18' => { bus => 1, addr => 13 }, | |
189 | 'net19' => { bus => 1, addr => 14 }, | |
190 | 'net20' => { bus => 1, addr => 15 }, | |
191 | 'net21' => { bus => 1, addr => 16 }, | |
192 | 'net22' => { bus => 1, addr => 17 }, | |
193 | 'net23' => { bus => 1, addr => 18 }, | |
194 | 'net24' => { bus => 1, addr => 19 }, | |
195 | 'net25' => { bus => 1, addr => 20 }, | |
196 | 'net26' => { bus => 1, addr => 21 }, | |
197 | 'net27' => { bus => 1, addr => 22 }, | |
198 | 'net28' => { bus => 1, addr => 23 }, | |
199 | 'net29' => { bus => 1, addr => 24 }, | |
200 | 'net30' => { bus => 1, addr => 25 }, | |
201 | 'net31' => { bus => 1, addr => 26 }, | |
202 | 'xhci' => { bus => 1, addr => 27 }, | |
2513b862 | 203 | 'pci.4' => { bus => 1, addr => 28 }, |
2cf61f33 | 204 | 'rng0' => { bus => 1, addr => 29 }, |
13d68979 | 205 | 'pci.2-igd' => { bus => 1, addr => 30 }, # replaces pci.2 in case a legacy IGD device is passed through |
d7d698f6 TL |
206 | 'virtio6' => { bus => 2, addr => 1 }, |
207 | 'virtio7' => { bus => 2, addr => 2 }, | |
208 | 'virtio8' => { bus => 2, addr => 3 }, | |
209 | 'virtio9' => { bus => 2, addr => 4 }, | |
210 | 'virtio10' => { bus => 2, addr => 5 }, | |
211 | 'virtio11' => { bus => 2, addr => 6 }, | |
212 | 'virtio12' => { bus => 2, addr => 7 }, | |
213 | 'virtio13' => { bus => 2, addr => 8 }, | |
214 | 'virtio14' => { bus => 2, addr => 9 }, | |
215 | 'virtio15' => { bus => 2, addr => 10 }, | |
216 | 'ivshmem' => { bus => 2, addr => 11 }, | |
217 | 'audio0' => { bus => 2, addr => 12 }, | |
218 | hostpci4 => { bus => 2, addr => 13 }, | |
219 | hostpci5 => { bus => 2, addr => 14 }, | |
220 | hostpci6 => { bus => 2, addr => 15 }, | |
221 | hostpci7 => { bus => 2, addr => 16 }, | |
222 | hostpci8 => { bus => 2, addr => 17 }, | |
223 | hostpci9 => { bus => 2, addr => 18 }, | |
224 | hostpci10 => { bus => 2, addr => 19 }, | |
225 | hostpci11 => { bus => 2, addr => 20 }, | |
226 | hostpci12 => { bus => 2, addr => 21 }, | |
227 | hostpci13 => { bus => 2, addr => 22 }, | |
228 | hostpci14 => { bus => 2, addr => 23 }, | |
229 | hostpci15 => { bus => 2, addr => 24 }, | |
230 | 'virtioscsi0' => { bus => 3, addr => 1 }, | |
231 | 'virtioscsi1' => { bus => 3, addr => 2 }, | |
232 | 'virtioscsi2' => { bus => 3, addr => 3 }, | |
233 | 'virtioscsi3' => { bus => 3, addr => 4 }, | |
234 | 'virtioscsi4' => { bus => 3, addr => 5 }, | |
235 | 'virtioscsi5' => { bus => 3, addr => 6 }, | |
236 | 'virtioscsi6' => { bus => 3, addr => 7 }, | |
237 | 'virtioscsi7' => { bus => 3, addr => 8 }, | |
238 | 'virtioscsi8' => { bus => 3, addr => 9 }, | |
239 | 'virtioscsi9' => { bus => 3, addr => 10 }, | |
240 | 'virtioscsi10' => { bus => 3, addr => 11 }, | |
241 | 'virtioscsi11' => { bus => 3, addr => 12 }, | |
242 | 'virtioscsi12' => { bus => 3, addr => 13 }, | |
243 | 'virtioscsi13' => { bus => 3, addr => 14 }, | |
244 | 'virtioscsi14' => { bus => 3, addr => 15 }, | |
245 | 'virtioscsi15' => { bus => 3, addr => 16 }, | |
246 | 'virtioscsi16' => { bus => 3, addr => 17 }, | |
247 | 'virtioscsi17' => { bus => 3, addr => 18 }, | |
248 | 'virtioscsi18' => { bus => 3, addr => 19 }, | |
249 | 'virtioscsi19' => { bus => 3, addr => 20 }, | |
250 | 'virtioscsi20' => { bus => 3, addr => 21 }, | |
251 | 'virtioscsi21' => { bus => 3, addr => 22 }, | |
252 | 'virtioscsi22' => { bus => 3, addr => 23 }, | |
253 | 'virtioscsi23' => { bus => 3, addr => 24 }, | |
254 | 'virtioscsi24' => { bus => 3, addr => 25 }, | |
255 | 'virtioscsi25' => { bus => 3, addr => 26 }, | |
256 | 'virtioscsi26' => { bus => 3, addr => 27 }, | |
257 | 'virtioscsi27' => { bus => 3, addr => 28 }, | |
258 | 'virtioscsi28' => { bus => 3, addr => 29 }, | |
259 | 'virtioscsi29' => { bus => 3, addr => 30 }, | |
260 | 'virtioscsi30' => { bus => 3, addr => 31 }, | |
2513b862 DC |
261 | 'scsihw2' => { bus => 4, addr => 1 }, |
262 | 'scsihw3' => { bus => 4, addr => 2 }, | |
263 | 'scsihw4' => { bus => 4, addr => 3 }, | |
d7d698f6 TL |
264 | } if !defined($pci_addr_map); |
265 | return $pci_addr_map; | |
266 | } | |
267 | ||
bbf96e0f | 268 | sub generate_mdev_uuid { |
e2b42bee TL |
269 | my ($vmid, $index) = @_; |
270 | return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid); | |
271 | } | |
272 | ||
d7d698f6 TL |
273 | my $get_addr_mapping_from_id = sub { |
274 | my ($map, $id) = @_; | |
275 | ||
276 | my $d = $map->{$id}; | |
d1c1af4b | 277 | return if !defined($d) || !defined($d->{bus}) || !defined($d->{addr}); |
d7d698f6 TL |
278 | |
279 | return { bus => $d->{bus}, addr => sprintf("0x%x", $d->{addr}) }; | |
de9768f0 DC |
280 | }; |
281 | ||
282 | sub print_pci_addr { | |
d559309f | 283 | my ($id, $bridges, $arch, $machine) = @_; |
de9768f0 DC |
284 | |
285 | my $res = ''; | |
286 | ||
d7d698f6 | 287 | # using same bus slots on all HW, so we need to check special cases here: |
d559309f WB |
288 | my $busname = 'pci'; |
289 | if ($arch eq 'aarch64' && $machine =~ /^virt/) { | |
d7d698f6 | 290 | die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide/; |
d559309f WB |
291 | $busname = 'pcie'; |
292 | } | |
293 | ||
d7d698f6 TL |
294 | my $map = get_pci_addr_map(); |
295 | if (my $d = $get_addr_mapping_from_id->($map, $id)) { | |
296 | $res = ",bus=$busname.$d->{bus},addr=$d->{addr}"; | |
297 | $bridges->{$d->{bus}} = 1 if $bridges; | |
de9768f0 | 298 | } |
de9768f0 | 299 | |
d7d698f6 | 300 | return $res; |
de9768f0 DC |
301 | } |
302 | ||
d7d698f6 TL |
303 | my $pcie_addr_map; |
304 | sub get_pcie_addr_map { | |
305 | $pcie_addr_map = { | |
55655ebc | 306 | vga => { bus => 'pcie.0', addr => 1 }, |
de9768f0 DC |
307 | hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 }, |
308 | hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 }, | |
309 | hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 }, | |
310 | hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 }, | |
c4e16381 AL |
311 | hostpci4 => { bus => "ich9-pcie-port-5", addr => 0 }, |
312 | hostpci5 => { bus => "ich9-pcie-port-6", addr => 0 }, | |
313 | hostpci6 => { bus => "ich9-pcie-port-7", addr => 0 }, | |
314 | hostpci7 => { bus => "ich9-pcie-port-8", addr => 0 }, | |
315 | hostpci8 => { bus => "ich9-pcie-port-9", addr => 0 }, | |
316 | hostpci9 => { bus => "ich9-pcie-port-10", addr => 0 }, | |
317 | hostpci10 => { bus => "ich9-pcie-port-11", addr => 0 }, | |
318 | hostpci11 => { bus => "ich9-pcie-port-12", addr => 0 }, | |
319 | hostpci12 => { bus => "ich9-pcie-port-13", addr => 0 }, | |
320 | hostpci13 => { bus => "ich9-pcie-port-14", addr => 0 }, | |
321 | hostpci14 => { bus => "ich9-pcie-port-15", addr => 0 }, | |
322 | hostpci15 => { bus => "ich9-pcie-port-16", addr => 0 }, | |
739ba340 DC |
323 | # win7 is picky about pcie assignments |
324 | hostpci0bus0 => { bus => "pcie.0", addr => 16 }, | |
325 | hostpci1bus0 => { bus => "pcie.0", addr => 17 }, | |
326 | hostpci2bus0 => { bus => "pcie.0", addr => 18 }, | |
327 | hostpci3bus0 => { bus => "pcie.0", addr => 19 }, | |
6dbcb073 | 328 | ivshmem => { bus => 'pcie.0', addr => 20 }, |
c4e16381 AL |
329 | hostpci4bus0 => { bus => "pcie.0", addr => 9 }, |
330 | hostpci5bus0 => { bus => "pcie.0", addr => 10 }, | |
331 | hostpci6bus0 => { bus => "pcie.0", addr => 11 }, | |
332 | hostpci7bus0 => { bus => "pcie.0", addr => 12 }, | |
333 | hostpci8bus0 => { bus => "pcie.0", addr => 13 }, | |
334 | hostpci9bus0 => { bus => "pcie.0", addr => 14 }, | |
335 | hostpci10bus0 => { bus => "pcie.0", addr => 15 }, | |
e2b0d85d TL |
336 | hostpci11bus0 => { bus => "pcie.0", addr => 21 }, |
337 | hostpci12bus0 => { bus => "pcie.0", addr => 22 }, | |
338 | hostpci13bus0 => { bus => "pcie.0", addr => 23 }, | |
339 | hostpci14bus0 => { bus => "pcie.0", addr => 24 }, | |
340 | hostpci15bus0 => { bus => "pcie.0", addr => 25 }, | |
d7d698f6 TL |
341 | } if !defined($pcie_addr_map); |
342 | ||
343 | return $pcie_addr_map; | |
344 | } | |
345 | ||
346 | sub print_pcie_addr { | |
347 | my ($id) = @_; | |
348 | ||
349 | my $res = ''; | |
de9768f0 | 350 | |
d7d698f6 TL |
351 | my $map = get_pcie_addr_map($id); |
352 | if (my $d = $get_addr_mapping_from_id->($map, $id)) { | |
353 | $res = ",bus=$d->{bus},addr=$d->{addr}"; | |
de9768f0 | 354 | } |
de9768f0 | 355 | |
d7d698f6 | 356 | return $res; |
de9768f0 | 357 | } |
b71351a7 | 358 | |
c4e16381 AL |
359 | # Generates the device strings for additional pcie root ports. The first 4 pcie |
360 | # root ports are defined in the pve-q35*.cfg files. | |
361 | sub print_pcie_root_port { | |
362 | my ($i) = @_; | |
363 | my $res = ''; | |
364 | ||
c4e16381 | 365 | my $root_port_addresses = { |
e2b0d85d TL |
366 | 4 => "10.0", |
367 | 5 => "10.1", | |
368 | 6 => "10.2", | |
369 | 7 => "10.3", | |
370 | 8 => "10.4", | |
371 | 9 => "10.5", | |
c4e16381 AL |
372 | 10 => "10.6", |
373 | 11 => "10.7", | |
374 | 12 => "11.0", | |
375 | 13 => "11.1", | |
376 | 14 => "11.2", | |
377 | 15 => "11.3", | |
378 | }; | |
379 | ||
380 | if (defined($root_port_addresses->{$i})) { | |
e2b0d85d | 381 | my $id = $i + 1; |
c4e16381 AL |
382 | $res = "pcie-root-port,id=ich9-pcie-port-${id}"; |
383 | $res .= ",addr=$root_port_addresses->{$i}"; | |
384 | $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0"; | |
385 | $res .= ",port=${id},chassis=${id}"; | |
386 | } | |
387 | ||
388 | return $res; | |
389 | } | |
390 | ||
9b71c34d DC |
391 | # returns the parsed pci config but parses the 'host' part into |
392 | # a list if lists into the 'id' property like this: | |
393 | # | |
394 | # { | |
395 | # mdev => 1, | |
396 | # rombar => ... | |
397 | # ... | |
398 | # ids => [ | |
399 | # # this contains a list of alternative devices, | |
400 | # [ | |
401 | # # which are itself lists of ids for one multifunction device | |
402 | # { | |
403 | # id => "0000:00:00.0", | |
404 | # vendor => "...", | |
405 | # }, | |
406 | # { | |
407 | # id => "0000:00:00.1", | |
408 | # vendor => "...", | |
409 | # }, | |
410 | # ], | |
411 | # [ | |
412 | # ... | |
413 | # ], | |
414 | # ... | |
415 | # ], | |
416 | # } | |
74c17b7a SR |
417 | sub parse_hostpci { |
418 | my ($value) = @_; | |
419 | ||
d1c1af4b | 420 | return if !$value; |
74c17b7a SR |
421 | |
422 | my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value); | |
423 | ||
9b71c34d DC |
424 | my $alternatives = []; |
425 | my $host = delete $res->{host}; | |
426 | my $mapping = delete $res->{mapping}; | |
427 | ||
428 | die "Cannot set both 'host' and 'mapping'.\n" if defined($host) && defined($mapping); | |
429 | ||
430 | if ($mapping) { | |
431 | # we have no ordinary pci id, must be a mapping | |
432 | my $devices = PVE::Mapping::PCI::find_on_current_node($mapping); | |
433 | die "PCI device mapping not found for '$mapping'\n" if !$devices || !scalar($devices->@*); | |
434 | ||
435 | for my $device ($devices->@*) { | |
436 | eval { PVE::Mapping::PCI::assert_valid($mapping, $device) }; | |
437 | die "PCI device mapping invalid (hardware probably changed): $@\n" if $@; | |
438 | push $alternatives->@*, [split(/;/, $device->{path})]; | |
439 | } | |
440 | } elsif ($host) { | |
441 | push $alternatives->@*, [split(/;/, $host)]; | |
442 | } else { | |
443 | die "Either 'host' or 'mapping' must be set.\n"; | |
74c17b7a | 444 | } |
9b71c34d DC |
445 | |
446 | $res->{ids} = []; | |
447 | for my $alternative ($alternatives->@*) { | |
448 | my $ids = []; | |
449 | foreach my $id ($alternative->@*) { | |
450 | my $devs = PVE::SysFSTools::lspci($id); | |
451 | die "no PCI device found for '$id'\n" if !scalar($devs->@*); | |
452 | push $ids->@*, @$devs; | |
453 | } | |
454 | if (scalar($ids->@*) > 1) { | |
455 | $res->{'has-multifunction'} = 1; | |
456 | die "cannot use mediated device with multifunction device\n" if $res->{mdev}; | |
457 | } | |
458 | push $res->{ids}->@*, $ids; | |
459 | } | |
460 | ||
74c17b7a SR |
461 | return $res; |
462 | } | |
463 | ||
9b71c34d DC |
464 | # parses all hostpci devices from a config and does some sanity checks |
465 | # returns a hash like this: | |
466 | # { | |
467 | # hostpci0 => { | |
468 | # # hash from parse_hostpci function | |
469 | # }, | |
470 | # hostpci1 => { ... }, | |
471 | # ... | |
472 | # } | |
473 | sub parse_hostpci_devices { | |
474 | my ($conf) = @_; | |
475 | ||
476 | my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf); | |
477 | my $legacy_igd = 0; | |
478 | ||
479 | my $parsed_devices = {}; | |
480 | for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { | |
481 | my $id = "hostpci$i"; | |
482 | my $d = parse_hostpci($conf->{$id}); | |
483 | next if !$d; | |
484 | ||
485 | # check syntax | |
486 | die "q35 machine model is not enabled" if !$q35 && $d->{pcie}; | |
487 | ||
488 | if ($d->{'legacy-igd'}) { | |
489 | die "only one device can be assigned in legacy-igd mode\n" | |
490 | if $legacy_igd; | |
491 | $legacy_igd = 1; | |
492 | ||
493 | die "legacy IGD assignment requires VGA mode to be 'none'\n" | |
494 | if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none'; | |
495 | die "legacy IGD assignment requires rombar to be enabled\n" | |
496 | if defined($d->{rombar}) && !$d->{rombar}; | |
497 | die "legacy IGD assignment is not compatible with x-vga\n" | |
498 | if $d->{'x-vga'}; | |
499 | die "legacy IGD assignment is not compatible with mdev\n" | |
500 | if $d->{mdev}; | |
501 | die "legacy IGD assignment is not compatible with q35\n" | |
502 | if $q35; | |
503 | die "legacy IGD assignment is not compatible with multifunction devices\n" | |
504 | if $d->{'has-multifunction'}; | |
505 | die "legacy IGD assignment is not compatible with alternate devices\n" | |
506 | if scalar($d->{ids}->@*) > 1; | |
507 | # check first device for valid id | |
508 | die "legacy IGD assignment only works for devices on host bus 00:02.0\n" | |
509 | if $d->{ids}->[0]->[0]->{id} !~ m/02\.0$/; | |
510 | } | |
511 | ||
512 | $parsed_devices->{$id} = $d; | |
513 | } | |
514 | ||
515 | return $parsed_devices; | |
516 | } | |
517 | ||
518 | # takes the hash returned by parse_hostpci_devices and for all non mdev gpus, | |
519 | # selects one of the given alternatives by trying to reserve it | |
520 | # | |
521 | # mdev devices must be chosen later when we actually allocate it, but we | |
522 | # flatten the inner list since there can only be one device per alternative anyway | |
523 | my sub choose_hostpci_devices { | |
524 | my ($devices, $vmid) = @_; | |
525 | ||
526 | my $used = {}; | |
527 | ||
528 | my $add_used_device = sub { | |
529 | my ($devices) = @_; | |
530 | for my $used_device ($devices->@*) { | |
531 | my $used_id = $used_device->{id}; | |
532 | die "device '$used_id' assigned more than once\n" if $used->{$used_id}; | |
533 | $used->{$used_id} = 1; | |
534 | } | |
535 | }; | |
536 | ||
537 | for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { | |
538 | my $device = $devices->{"hostpci$i"}; | |
539 | next if !$device; | |
540 | ||
541 | if ($device->{mdev}) { | |
542 | $device->{ids} = [ map { $_->[0] } $device->{ids}->@* ]; | |
543 | next; | |
544 | } | |
545 | ||
546 | if (scalar($device->{ids}->@* == 1)) { | |
547 | # we only have one alternative, use that | |
548 | $device->{ids} = $device->{ids}->[0]; | |
549 | $add_used_device->($device->{ids}); | |
550 | next; | |
551 | } | |
552 | ||
553 | my $found = 0; | |
554 | for my $alternative ($device->{ids}->@*) { | |
555 | my $ids = [map { $_->{id} } @$alternative]; | |
556 | ||
557 | next if grep { defined($used->{$_}) } @$ids; # already used | |
558 | eval { reserve_pci_usage($ids, $vmid, 10, undef) }; | |
559 | next if $@; | |
560 | ||
561 | # found one that is not used or reserved | |
562 | $add_used_device->($alternative); | |
563 | $device->{ids} = $alternative; | |
564 | $found = 1; | |
565 | last; | |
566 | } | |
567 | die "could not find a free device for 'hostpci$i'\n" if !$found; | |
568 | } | |
569 | ||
570 | return $devices; | |
571 | } | |
572 | ||
74c17b7a | 573 | sub print_hostpci_devices { |
9b71c34d | 574 | my ($vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder) = @_; |
74c17b7a SR |
575 | |
576 | my $kvm_off = 0; | |
577 | my $gpu_passthrough = 0; | |
13d68979 | 578 | my $legacy_igd = 0; |
74c17b7a | 579 | |
f7d1505b | 580 | my $pciaddr; |
9b71c34d DC |
581 | my $pci_devices = choose_hostpci_devices(parse_hostpci_devices($conf), $vmid); |
582 | ||
74c17b7a SR |
583 | for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { |
584 | my $id = "hostpci$i"; | |
9b71c34d | 585 | my $d = $pci_devices->{$id}; |
74c17b7a SR |
586 | next if !$d; |
587 | ||
9b71c34d DC |
588 | $legacy_igd = 1 if $d->{'legacy-igd'}; |
589 | ||
74c17b7a | 590 | if (my $pcie = $d->{pcie}) { |
74c17b7a SR |
591 | # win7 wants to have the pcie devices directly on the pcie bus |
592 | # instead of in the root port | |
593 | if ($winversion == 7) { | |
594 | $pciaddr = print_pcie_addr("${id}bus0"); | |
595 | } else { | |
596 | # add more root ports if needed, 4 are present by default | |
597 | # by pve-q35 cfgs, rest added here on demand. | |
598 | if ($i > 3) { | |
599 | push @$devices, '-device', print_pcie_root_port($i); | |
600 | } | |
601 | $pciaddr = print_pcie_addr($id); | |
602 | } | |
603 | } else { | |
13d68979 SR |
604 | my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id; |
605 | $pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type); | |
606 | } | |
607 | ||
9b71c34d DC |
608 | my $num_devices = scalar($d->{ids}->@*); |
609 | my $multifunction = $num_devices > 1 && !$d->{mdev}; | |
74c17b7a SR |
610 | |
611 | my $xvga = ''; | |
612 | if ($d->{'x-vga'}) { | |
613 | $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf'); | |
614 | $kvm_off = 1; | |
615 | $vga->{type} = 'none' if !defined($conf->{vga}); | |
616 | $gpu_passthrough = 1; | |
617 | } | |
618 | ||
74c17b7a | 619 | my $sysfspath; |
9b71c34d | 620 | if ($d->{mdev}) { |
e2b42bee | 621 | my $uuid = generate_mdev_uuid($vmid, $i); |
6fa358a3 | 622 | $sysfspath = "/sys/bus/mdev/devices/$uuid"; |
74c17b7a SR |
623 | } |
624 | ||
9b71c34d DC |
625 | for (my $j = 0; $j < $num_devices; $j++) { |
626 | my $pcidevice = $d->{ids}->[$j]; | |
74c17b7a SR |
627 | my $devicestr = "vfio-pci"; |
628 | ||
629 | if ($sysfspath) { | |
630 | $devicestr .= ",sysfsdev=$sysfspath"; | |
631 | } else { | |
632 | $devicestr .= ",host=$pcidevice->{id}"; | |
633 | } | |
634 | ||
635 | my $mf_addr = $multifunction ? ".$j" : ''; | |
636 | $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}"; | |
637 | ||
638 | if ($j == 0) { | |
639 | $devicestr .= ',rombar=0' if defined($d->{rombar}) && !$d->{rombar}; | |
640 | $devicestr .= "$xvga"; | |
641 | $devicestr .= ",multifunction=on" if $multifunction; | |
642 | $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile}; | |
2141a802 | 643 | $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id}; |
d806b017 NS |
644 | for my $option (qw(vendor-id device-id sub-vendor-id sub-device-id)) { |
645 | $devicestr .= ",x-pci-$option=$d->{$option}" if $d->{$option}; | |
646 | } | |
74c17b7a SR |
647 | } |
648 | ||
9b71c34d | 649 | |
74c17b7a | 650 | push @$devices, '-device', $devicestr; |
9b71c34d | 651 | last if $d->{mdev}; |
74c17b7a SR |
652 | } |
653 | } | |
654 | ||
9b71c34d | 655 | return ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices); |
74c17b7a SR |
656 | } |
657 | ||
acd4b777 | 658 | sub prepare_pci_device { |
82712fcd | 659 | my ($vmid, $pciid, $index, $mdev) = @_; |
acd4b777 DC |
660 | |
661 | my $info = PVE::SysFSTools::pci_device_info("$pciid"); | |
d01de38c | 662 | die "cannot prepare PCI pass-through, IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support(); |
acd4b777 DC |
663 | die "no pci device info for device '$pciid'\n" if !$info; |
664 | ||
665 | if ($mdev) { | |
e2b42bee | 666 | my $uuid = generate_mdev_uuid($vmid, $index); |
acd4b777 DC |
667 | PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev); |
668 | } else { | |
669 | die "can't unbind/bind PCI group to VFIO '$pciid'\n" | |
670 | if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid); | |
671 | die "can't reset PCI device '$pciid'\n" | |
672 | if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info); | |
673 | } | |
bbf96e0f DC |
674 | |
675 | return $info; | |
acd4b777 DC |
676 | } |
677 | ||
bda0ebff TL |
678 | my $RUNDIR = '/run/qemu-server'; |
679 | my $PCIID_RESERVATION_FILE = "${RUNDIR}/pci-id-reservations"; | |
680 | my $PCIID_RESERVATION_LOCK = "${PCIID_RESERVATION_FILE}.lock"; | |
3bfee796 | 681 | |
2fa64dbd TL |
682 | # a list of PCI ID to VMID reservations, the validity is protected against leakage by either a PID, |
683 | # for succesfully started VM processes, or a expiration time for the initial time window between | |
684 | # reservation and actual VM process start-up. | |
cda95d52 | 685 | my $parse_pci_reservation_unlocked = sub { |
3bfee796 | 686 | my $pciids = {}; |
cda95d52 | 687 | if (my $fh = IO::File->new($PCIID_RESERVATION_FILE, "r")) { |
3bfee796 DC |
688 | while (my $line = <$fh>) { |
689 | if ($line =~ m/^($PCIRE)\s(\d+)\s(time|pid)\:(\d+)$/) { | |
690 | $pciids->{$1} = { | |
691 | vmid => $2, | |
692 | "$3" => $4, | |
693 | }; | |
694 | } | |
695 | } | |
696 | } | |
3bfee796 DC |
697 | return $pciids; |
698 | }; | |
699 | ||
cda95d52 | 700 | my $write_pci_reservation_unlocked = sub { |
a0159367 | 701 | my ($reservations) = @_; |
3bfee796 DC |
702 | |
703 | my $data = ""; | |
a0159367 TL |
704 | for my $pci_id (sort keys $reservations->%*) { |
705 | my ($vmid, $pid, $time) = $reservations->{$pci_id}->@{'vmid', 'pid', 'time'}; | |
706 | if (defined($pid)) { | |
707 | $data .= "$pci_id $vmid pid:$pid\n"; | |
3bfee796 | 708 | } else { |
a0159367 | 709 | $data .= "$pci_id $vmid time:$time\n"; |
3bfee796 DC |
710 | } |
711 | } | |
3bfee796 DC |
712 | PVE::Tools::file_set_contents($PCIID_RESERVATION_FILE, $data); |
713 | }; | |
714 | ||
2fa64dbd | 715 | # removes all PCI device reservations held by the `vmid` |
3bfee796 | 716 | sub remove_pci_reservation { |
1b189121 | 717 | my ($vmid) = @_; |
3bfee796 | 718 | |
a0159367 TL |
719 | PVE::Tools::lock_file($PCIID_RESERVATION_LOCK, 2, sub { |
720 | my $reservation_list = $parse_pci_reservation_unlocked->(); | |
1b189121 DC |
721 | for my $id (keys %$reservation_list) { |
722 | my $reservation = $reservation_list->{$id}; | |
723 | next if $reservation->{vmid} != $vmid; | |
724 | delete $reservation_list->{$id}; | |
725 | } | |
a0159367 TL |
726 | $write_pci_reservation_unlocked->($reservation_list); |
727 | }); | |
3bfee796 | 728 | die $@ if $@; |
3bfee796 DC |
729 | } |
730 | ||
731 | sub reserve_pci_usage { | |
a0159367 | 732 | my ($requested_ids, $vmid, $timeout, $pid) = @_; |
3bfee796 | 733 | |
a0159367 TL |
734 | $requested_ids = [ $requested_ids ] if !ref($requested_ids); |
735 | return if !scalar(@$requested_ids); # do nothing for empty list | |
3bfee796 | 736 | |
a0159367 TL |
737 | PVE::Tools::lock_file($PCIID_RESERVATION_LOCK, 5, sub { |
738 | my $reservation_list = $parse_pci_reservation_unlocked->(); | |
3bfee796 DC |
739 | |
740 | my $ctime = time(); | |
a0159367 TL |
741 | for my $id ($requested_ids->@*) { |
742 | my $reservation = $reservation_list->{$id}; | |
743 | if ($reservation && $reservation->{vmid} != $vmid) { | |
744 | # check time based reservation | |
745 | die "PCI device '$id' is currently reserved for use by VMID '$reservation->{vmid}'\n" | |
746 | if defined($reservation->{time}) && $reservation->{time} > $ctime; | |
747 | ||
748 | if (my $reserved_pid = $reservation->{pid}) { | |
3bfee796 | 749 | # check running vm |
a0159367 TL |
750 | my $running_pid = PVE::QemuServer::Helpers::vm_running_locally($reservation->{vmid}); |
751 | if (defined($running_pid) && $running_pid == $reserved_pid) { | |
752 | die "PCI device '$id' already in use by VMID '$reservation->{vmid}'\n"; | |
753 | } else { | |
754 | warn "leftover PCI reservation found for $id, lets take it...\n"; | |
3bfee796 DC |
755 | } |
756 | } | |
9b71c34d DC |
757 | } elsif ($reservation) { |
758 | # already reserved by the same vmid | |
759 | if (my $reserved_time = $reservation->{time}) { | |
760 | if (defined($timeout)) { | |
761 | # use the longer timeout | |
762 | my $old_timeout = $reservation->{time} - 5 - $ctime; | |
763 | $timeout = $old_timeout if $old_timeout > $timeout; | |
764 | } | |
765 | } elsif (my $reserved_pid = $reservation->{pid}) { | |
766 | my $running_pid = PVE::QemuServer::Helpers::vm_running_locally($reservation->{vmid}); | |
767 | if (defined($running_pid) && $running_pid == $reservation->{pid}) { | |
768 | if (defined($pid)) { | |
769 | die "PCI device '$id' already in use by running VMID '$reservation->{vmid}'\n"; | |
770 | } elsif (defined($timeout)) { | |
771 | # ignore timeout reservation for running vms, can happen with e.g. | |
772 | # qm showcmd | |
773 | return; | |
774 | } | |
775 | } | |
776 | } | |
3bfee796 DC |
777 | } |
778 | ||
a0159367 TL |
779 | $reservation_list->{$id} = { vmid => $vmid }; |
780 | if (defined($pid)) { # VM started up, we can reserve now with the actual PID | |
781 | $reservation_list->{$id}->{pid} = $pid; | |
782 | } elsif (defined($timeout)) { # tempoaray reserve as we don't now the PID yet | |
783 | $reservation_list->{$id}->{time} = $ctime + $timeout + 5; | |
3bfee796 | 784 | } |
3bfee796 | 785 | } |
a0159367 | 786 | $write_pci_reservation_unlocked->($reservation_list); |
3bfee796 DC |
787 | }); |
788 | die $@ if $@; | |
789 | } | |
790 | ||
b71351a7 | 791 | 1; |