]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer/PCI.pm
schema: fix description of migrate_downtime parameter
[qemu-server.git] / PVE / QemuServer / PCI.pm
1 package PVE::QemuServer::PCI;
2
3 use warnings;
4 use strict;
5
6 use PVE::JSONSchema;
7 use PVE::Mapping::PCI;
8 use PVE::SysFSTools;
9 use PVE::Tools;
10
11 use base 'Exporter';
12
13 our @EXPORT_OK = qw(
14 print_pci_addr
15 print_pcie_addr
16 print_pcie_root_port
17 parse_hostpci
18 );
19
20 our $MAX_HOSTPCI_DEVICES = 16;
21
22 my $PCIRE = qr/(?:[a-f0-9]{4,}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
23 my $hostpci_fmt = {
24 host => {
25 default_key => 1,
26 optional => 1,
27 type => 'string',
28 pattern => qr/$PCIRE(;$PCIRE)*/,
29 format_description => 'HOSTPCIID[;HOSTPCIID2...]',
30 description => <<EODESCR,
31 Host PCI device pass through. The PCI ID of a host's PCI device or a list
32 of PCI virtual functions of the host. HOSTPCIID syntax is:
33
34 'bus:dev.func' (hexadecimal numbers)
35
36 You can us the 'lspci' command to list existing PCI devices.
37
38 Either this or the 'mapping' key must be set.
39 EODESCR
40 },
41 mapping => {
42 optional => 1,
43 type => 'string',
44 format_description => 'mapping-id',
45 format => 'pve-configid',
46 description => "The ID of a cluster wide mapping. Either this or the default-key 'host'"
47 ." must be set.",
48 },
49 rombar => {
50 type => 'boolean',
51 description => "Specify whether or not the device's ROM will be visible in the"
52 ." guest's memory map.",
53 optional => 1,
54 default => 1,
55 },
56 romfile => {
57 type => 'string',
58 pattern => '[^,;]+',
59 format_description => 'string',
60 description => "Custom pci device rom filename (must be located in /usr/share/kvm/).",
61 optional => 1,
62 },
63 pcie => {
64 type => 'boolean',
65 description => "Choose the PCI-express bus (needs the 'q35' machine model).",
66 optional => 1,
67 default => 0,
68 },
69 'x-vga' => {
70 type => 'boolean',
71 description => "Enable vfio-vga device support.",
72 optional => 1,
73 default => 0,
74 },
75 'legacy-igd' => {
76 type => 'boolean',
77 description => "Pass this device in legacy IGD mode, making it the primary and exclusive"
78 ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.",
79 optional => 1,
80 default => 0,
81 },
82 'mdev' => {
83 type => 'string',
84 format_description => 'string',
85 pattern => '[^/\.:]+',
86 optional => 1,
87 description => <<EODESCR
88 The type of mediated device to use.
89 An instance of this type will be created on startup of the VM and
90 will be cleaned up when the VM stops.
91 EODESCR
92 },
93 'vendor-id' => {
94 type => 'string',
95 pattern => qr/^0x[0-9a-fA-F]{4}$/,
96 format_description => 'hex id',
97 optional => 1,
98 description => "Override PCI vendor ID visible to guest"
99 },
100 'device-id' => {
101 type => 'string',
102 pattern => qr/^0x[0-9a-fA-F]{4}$/,
103 format_description => 'hex id',
104 optional => 1,
105 description => "Override PCI device ID visible to guest"
106 },
107 'sub-vendor-id' => {
108 type => 'string',
109 pattern => qr/^0x[0-9a-fA-F]{4}$/,
110 format_description => 'hex id',
111 optional => 1,
112 description => "Override PCI subsystem vendor ID visible to guest"
113 },
114 'sub-device-id' => {
115 type => 'string',
116 pattern => qr/^0x[0-9a-fA-F]{4}$/,
117 format_description => 'hex id',
118 optional => 1,
119 description => "Override PCI subsystem device ID visible to guest"
120 }
121 };
122 PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
123
124 our $hostpcidesc = {
125 optional => 1,
126 type => 'string', format => 'pve-qm-hostpci',
127 description => "Map host PCI devices into guest.",
128 verbose_description => <<EODESCR,
129 Map host PCI devices into guest.
130
131 NOTE: This option allows direct access to host hardware. So it is no longer
132 possible to migrate such machines - use with special care.
133
134 CAUTION: Experimental! User reported problems with this option.
135 EODESCR
136 };
137 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
138
139 my $pci_addr_map;
140 sub get_pci_addr_map {
141 $pci_addr_map = {
142 piix3 => { bus => 0, addr => 1, conflict_ok => qw(ehci) },
143 ehci => { bus => 0, addr => 1, conflict_ok => qw(piix3) }, # instead of piix3 on arm
144 vga => { bus => 0, addr => 2, conflict_ok => qw(legacy-igd) },
145 'legacy-igd' => { bus => 0, addr => 2, conflict_ok => qw(vga) }, # legacy-igd requires vga=none
146 balloon0 => { bus => 0, addr => 3 },
147 watchdog => { bus => 0, addr => 4 },
148 scsihw0 => { bus => 0, addr => 5, conflict_ok => qw(pci.3) },
149 'pci.3' => { bus => 0, addr => 5, conflict_ok => qw(scsihw0) }, # also used for virtio-scsi-single bridge
150 scsihw1 => { bus => 0, addr => 6 },
151 ahci0 => { bus => 0, addr => 7 },
152 qga0 => { bus => 0, addr => 8 },
153 spice => { bus => 0, addr => 9 },
154 virtio0 => { bus => 0, addr => 10 },
155 virtio1 => { bus => 0, addr => 11 },
156 virtio2 => { bus => 0, addr => 12 },
157 virtio3 => { bus => 0, addr => 13 },
158 virtio4 => { bus => 0, addr => 14 },
159 virtio5 => { bus => 0, addr => 15 },
160 hostpci0 => { bus => 0, addr => 16 },
161 hostpci1 => { bus => 0, addr => 17 },
162 net0 => { bus => 0, addr => 18 },
163 net1 => { bus => 0, addr => 19 },
164 net2 => { bus => 0, addr => 20 },
165 net3 => { bus => 0, addr => 21 },
166 net4 => { bus => 0, addr => 22 },
167 net5 => { bus => 0, addr => 23 },
168 vga1 => { bus => 0, addr => 24 },
169 vga2 => { bus => 0, addr => 25 },
170 vga3 => { bus => 0, addr => 26 },
171 hostpci2 => { bus => 0, addr => 27 },
172 hostpci3 => { bus => 0, addr => 28 },
173 #addr29 : usb-host (pve-usb.cfg)
174 'pci.1' => { bus => 0, addr => 30 },
175 'pci.2' => { bus => 0, addr => 31 },
176 'net6' => { bus => 1, addr => 1 },
177 'net7' => { bus => 1, addr => 2 },
178 'net8' => { bus => 1, addr => 3 },
179 'net9' => { bus => 1, addr => 4 },
180 'net10' => { bus => 1, addr => 5 },
181 'net11' => { bus => 1, addr => 6 },
182 'net12' => { bus => 1, addr => 7 },
183 'net13' => { bus => 1, addr => 8 },
184 'net14' => { bus => 1, addr => 9 },
185 'net15' => { bus => 1, addr => 10 },
186 'net16' => { bus => 1, addr => 11 },
187 'net17' => { bus => 1, addr => 12 },
188 'net18' => { bus => 1, addr => 13 },
189 'net19' => { bus => 1, addr => 14 },
190 'net20' => { bus => 1, addr => 15 },
191 'net21' => { bus => 1, addr => 16 },
192 'net22' => { bus => 1, addr => 17 },
193 'net23' => { bus => 1, addr => 18 },
194 'net24' => { bus => 1, addr => 19 },
195 'net25' => { bus => 1, addr => 20 },
196 'net26' => { bus => 1, addr => 21 },
197 'net27' => { bus => 1, addr => 22 },
198 'net28' => { bus => 1, addr => 23 },
199 'net29' => { bus => 1, addr => 24 },
200 'net30' => { bus => 1, addr => 25 },
201 'net31' => { bus => 1, addr => 26 },
202 'xhci' => { bus => 1, addr => 27 },
203 'pci.4' => { bus => 1, addr => 28 },
204 'rng0' => { bus => 1, addr => 29 },
205 'pci.2-igd' => { bus => 1, addr => 30 }, # replaces pci.2 in case a legacy IGD device is passed through
206 'virtio6' => { bus => 2, addr => 1 },
207 'virtio7' => { bus => 2, addr => 2 },
208 'virtio8' => { bus => 2, addr => 3 },
209 'virtio9' => { bus => 2, addr => 4 },
210 'virtio10' => { bus => 2, addr => 5 },
211 'virtio11' => { bus => 2, addr => 6 },
212 'virtio12' => { bus => 2, addr => 7 },
213 'virtio13' => { bus => 2, addr => 8 },
214 'virtio14' => { bus => 2, addr => 9 },
215 'virtio15' => { bus => 2, addr => 10 },
216 'ivshmem' => { bus => 2, addr => 11 },
217 'audio0' => { bus => 2, addr => 12 },
218 hostpci4 => { bus => 2, addr => 13 },
219 hostpci5 => { bus => 2, addr => 14 },
220 hostpci6 => { bus => 2, addr => 15 },
221 hostpci7 => { bus => 2, addr => 16 },
222 hostpci8 => { bus => 2, addr => 17 },
223 hostpci9 => { bus => 2, addr => 18 },
224 hostpci10 => { bus => 2, addr => 19 },
225 hostpci11 => { bus => 2, addr => 20 },
226 hostpci12 => { bus => 2, addr => 21 },
227 hostpci13 => { bus => 2, addr => 22 },
228 hostpci14 => { bus => 2, addr => 23 },
229 hostpci15 => { bus => 2, addr => 24 },
230 'virtioscsi0' => { bus => 3, addr => 1 },
231 'virtioscsi1' => { bus => 3, addr => 2 },
232 'virtioscsi2' => { bus => 3, addr => 3 },
233 'virtioscsi3' => { bus => 3, addr => 4 },
234 'virtioscsi4' => { bus => 3, addr => 5 },
235 'virtioscsi5' => { bus => 3, addr => 6 },
236 'virtioscsi6' => { bus => 3, addr => 7 },
237 'virtioscsi7' => { bus => 3, addr => 8 },
238 'virtioscsi8' => { bus => 3, addr => 9 },
239 'virtioscsi9' => { bus => 3, addr => 10 },
240 'virtioscsi10' => { bus => 3, addr => 11 },
241 'virtioscsi11' => { bus => 3, addr => 12 },
242 'virtioscsi12' => { bus => 3, addr => 13 },
243 'virtioscsi13' => { bus => 3, addr => 14 },
244 'virtioscsi14' => { bus => 3, addr => 15 },
245 'virtioscsi15' => { bus => 3, addr => 16 },
246 'virtioscsi16' => { bus => 3, addr => 17 },
247 'virtioscsi17' => { bus => 3, addr => 18 },
248 'virtioscsi18' => { bus => 3, addr => 19 },
249 'virtioscsi19' => { bus => 3, addr => 20 },
250 'virtioscsi20' => { bus => 3, addr => 21 },
251 'virtioscsi21' => { bus => 3, addr => 22 },
252 'virtioscsi22' => { bus => 3, addr => 23 },
253 'virtioscsi23' => { bus => 3, addr => 24 },
254 'virtioscsi24' => { bus => 3, addr => 25 },
255 'virtioscsi25' => { bus => 3, addr => 26 },
256 'virtioscsi26' => { bus => 3, addr => 27 },
257 'virtioscsi27' => { bus => 3, addr => 28 },
258 'virtioscsi28' => { bus => 3, addr => 29 },
259 'virtioscsi29' => { bus => 3, addr => 30 },
260 'virtioscsi30' => { bus => 3, addr => 31 },
261 'scsihw2' => { bus => 4, addr => 1 },
262 'scsihw3' => { bus => 4, addr => 2 },
263 'scsihw4' => { bus => 4, addr => 3 },
264 } if !defined($pci_addr_map);
265 return $pci_addr_map;
266 }
267
268 sub generate_mdev_uuid {
269 my ($vmid, $index) = @_;
270 return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
271 }
272
273 my $get_addr_mapping_from_id = sub {
274 my ($map, $id) = @_;
275
276 my $d = $map->{$id};
277 return if !defined($d) || !defined($d->{bus}) || !defined($d->{addr});
278
279 return { bus => $d->{bus}, addr => sprintf("0x%x", $d->{addr}) };
280 };
281
282 sub print_pci_addr {
283 my ($id, $bridges, $arch, $machine) = @_;
284
285 my $res = '';
286
287 # using same bus slots on all HW, so we need to check special cases here:
288 my $busname = 'pci';
289 if ($arch eq 'aarch64' && $machine =~ /^virt/) {
290 die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide/;
291 $busname = 'pcie';
292 }
293
294 my $map = get_pci_addr_map();
295 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
296 $res = ",bus=$busname.$d->{bus},addr=$d->{addr}";
297 $bridges->{$d->{bus}} = 1 if $bridges;
298 }
299
300 return $res;
301 }
302
303 my $pcie_addr_map;
304 sub get_pcie_addr_map {
305 $pcie_addr_map = {
306 vga => { bus => 'pcie.0', addr => 1 },
307 hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 },
308 hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 },
309 hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 },
310 hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 },
311 hostpci4 => { bus => "ich9-pcie-port-5", addr => 0 },
312 hostpci5 => { bus => "ich9-pcie-port-6", addr => 0 },
313 hostpci6 => { bus => "ich9-pcie-port-7", addr => 0 },
314 hostpci7 => { bus => "ich9-pcie-port-8", addr => 0 },
315 hostpci8 => { bus => "ich9-pcie-port-9", addr => 0 },
316 hostpci9 => { bus => "ich9-pcie-port-10", addr => 0 },
317 hostpci10 => { bus => "ich9-pcie-port-11", addr => 0 },
318 hostpci11 => { bus => "ich9-pcie-port-12", addr => 0 },
319 hostpci12 => { bus => "ich9-pcie-port-13", addr => 0 },
320 hostpci13 => { bus => "ich9-pcie-port-14", addr => 0 },
321 hostpci14 => { bus => "ich9-pcie-port-15", addr => 0 },
322 hostpci15 => { bus => "ich9-pcie-port-16", addr => 0 },
323 # win7 is picky about pcie assignments
324 hostpci0bus0 => { bus => "pcie.0", addr => 16 },
325 hostpci1bus0 => { bus => "pcie.0", addr => 17 },
326 hostpci2bus0 => { bus => "pcie.0", addr => 18 },
327 hostpci3bus0 => { bus => "pcie.0", addr => 19 },
328 ivshmem => { bus => 'pcie.0', addr => 20 },
329 hostpci4bus0 => { bus => "pcie.0", addr => 9 },
330 hostpci5bus0 => { bus => "pcie.0", addr => 10 },
331 hostpci6bus0 => { bus => "pcie.0", addr => 11 },
332 hostpci7bus0 => { bus => "pcie.0", addr => 12 },
333 hostpci8bus0 => { bus => "pcie.0", addr => 13 },
334 hostpci9bus0 => { bus => "pcie.0", addr => 14 },
335 hostpci10bus0 => { bus => "pcie.0", addr => 15 },
336 hostpci11bus0 => { bus => "pcie.0", addr => 21 },
337 hostpci12bus0 => { bus => "pcie.0", addr => 22 },
338 hostpci13bus0 => { bus => "pcie.0", addr => 23 },
339 hostpci14bus0 => { bus => "pcie.0", addr => 24 },
340 hostpci15bus0 => { bus => "pcie.0", addr => 25 },
341 } if !defined($pcie_addr_map);
342
343 return $pcie_addr_map;
344 }
345
346 sub print_pcie_addr {
347 my ($id) = @_;
348
349 my $res = '';
350
351 my $map = get_pcie_addr_map($id);
352 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
353 $res = ",bus=$d->{bus},addr=$d->{addr}";
354 }
355
356 return $res;
357 }
358
359 # Generates the device strings for additional pcie root ports. The first 4 pcie
360 # root ports are defined in the pve-q35*.cfg files.
361 sub print_pcie_root_port {
362 my ($i) = @_;
363 my $res = '';
364
365 my $root_port_addresses = {
366 4 => "10.0",
367 5 => "10.1",
368 6 => "10.2",
369 7 => "10.3",
370 8 => "10.4",
371 9 => "10.5",
372 10 => "10.6",
373 11 => "10.7",
374 12 => "11.0",
375 13 => "11.1",
376 14 => "11.2",
377 15 => "11.3",
378 };
379
380 if (defined($root_port_addresses->{$i})) {
381 my $id = $i + 1;
382 $res = "pcie-root-port,id=ich9-pcie-port-${id}";
383 $res .= ",addr=$root_port_addresses->{$i}";
384 $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0";
385 $res .= ",port=${id},chassis=${id}";
386 }
387
388 return $res;
389 }
390
391 # returns the parsed pci config but parses the 'host' part into
392 # a list if lists into the 'id' property like this:
393 #
394 # {
395 # mdev => 1,
396 # rombar => ...
397 # ...
398 # ids => [
399 # # this contains a list of alternative devices,
400 # [
401 # # which are itself lists of ids for one multifunction device
402 # {
403 # id => "0000:00:00.0",
404 # vendor => "...",
405 # },
406 # {
407 # id => "0000:00:00.1",
408 # vendor => "...",
409 # },
410 # ],
411 # [
412 # ...
413 # ],
414 # ...
415 # ],
416 # }
417 sub parse_hostpci {
418 my ($value) = @_;
419
420 return if !$value;
421
422 my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value);
423
424 my $alternatives = [];
425 my $host = delete $res->{host};
426 my $mapping = delete $res->{mapping};
427
428 die "Cannot set both 'host' and 'mapping'.\n" if defined($host) && defined($mapping);
429
430 if ($mapping) {
431 # we have no ordinary pci id, must be a mapping
432 my $devices = PVE::Mapping::PCI::find_on_current_node($mapping);
433 die "PCI device mapping not found for '$mapping'\n" if !$devices || !scalar($devices->@*);
434
435 for my $device ($devices->@*) {
436 eval { PVE::Mapping::PCI::assert_valid($mapping, $device) };
437 die "PCI device mapping invalid (hardware probably changed): $@\n" if $@;
438 push $alternatives->@*, [split(/;/, $device->{path})];
439 }
440 } elsif ($host) {
441 push $alternatives->@*, [split(/;/, $host)];
442 } else {
443 die "Either 'host' or 'mapping' must be set.\n";
444 }
445
446 $res->{ids} = [];
447 for my $alternative ($alternatives->@*) {
448 my $ids = [];
449 foreach my $id ($alternative->@*) {
450 my $devs = PVE::SysFSTools::lspci($id);
451 die "no PCI device found for '$id'\n" if !scalar($devs->@*);
452 push $ids->@*, @$devs;
453 }
454 if (scalar($ids->@*) > 1) {
455 $res->{'has-multifunction'} = 1;
456 die "cannot use mediated device with multifunction device\n" if $res->{mdev};
457 }
458 push $res->{ids}->@*, $ids;
459 }
460
461 return $res;
462 }
463
464 # parses all hostpci devices from a config and does some sanity checks
465 # returns a hash like this:
466 # {
467 # hostpci0 => {
468 # # hash from parse_hostpci function
469 # },
470 # hostpci1 => { ... },
471 # ...
472 # }
473 sub parse_hostpci_devices {
474 my ($conf) = @_;
475
476 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
477 my $legacy_igd = 0;
478
479 my $parsed_devices = {};
480 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
481 my $id = "hostpci$i";
482 my $d = parse_hostpci($conf->{$id});
483 next if !$d;
484
485 # check syntax
486 die "q35 machine model is not enabled" if !$q35 && $d->{pcie};
487
488 if ($d->{'legacy-igd'}) {
489 die "only one device can be assigned in legacy-igd mode\n"
490 if $legacy_igd;
491 $legacy_igd = 1;
492
493 die "legacy IGD assignment requires VGA mode to be 'none'\n"
494 if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none';
495 die "legacy IGD assignment requires rombar to be enabled\n"
496 if defined($d->{rombar}) && !$d->{rombar};
497 die "legacy IGD assignment is not compatible with x-vga\n"
498 if $d->{'x-vga'};
499 die "legacy IGD assignment is not compatible with mdev\n"
500 if $d->{mdev};
501 die "legacy IGD assignment is not compatible with q35\n"
502 if $q35;
503 die "legacy IGD assignment is not compatible with multifunction devices\n"
504 if $d->{'has-multifunction'};
505 die "legacy IGD assignment is not compatible with alternate devices\n"
506 if scalar($d->{ids}->@*) > 1;
507 # check first device for valid id
508 die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
509 if $d->{ids}->[0]->[0]->{id} !~ m/02\.0$/;
510 }
511
512 $parsed_devices->{$id} = $d;
513 }
514
515 return $parsed_devices;
516 }
517
518 # takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
519 # selects one of the given alternatives by trying to reserve it
520 #
521 # mdev devices must be chosen later when we actually allocate it, but we
522 # flatten the inner list since there can only be one device per alternative anyway
523 my sub choose_hostpci_devices {
524 my ($devices, $vmid) = @_;
525
526 my $used = {};
527
528 my $add_used_device = sub {
529 my ($devices) = @_;
530 for my $used_device ($devices->@*) {
531 my $used_id = $used_device->{id};
532 die "device '$used_id' assigned more than once\n" if $used->{$used_id};
533 $used->{$used_id} = 1;
534 }
535 };
536
537 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
538 my $device = $devices->{"hostpci$i"};
539 next if !$device;
540
541 if ($device->{mdev}) {
542 $device->{ids} = [ map { $_->[0] } $device->{ids}->@* ];
543 next;
544 }
545
546 if (scalar($device->{ids}->@* == 1)) {
547 # we only have one alternative, use that
548 $device->{ids} = $device->{ids}->[0];
549 $add_used_device->($device->{ids});
550 next;
551 }
552
553 my $found = 0;
554 for my $alternative ($device->{ids}->@*) {
555 my $ids = [map { $_->{id} } @$alternative];
556
557 next if grep { defined($used->{$_}) } @$ids; # already used
558 eval { reserve_pci_usage($ids, $vmid, 10, undef) };
559 next if $@;
560
561 # found one that is not used or reserved
562 $add_used_device->($alternative);
563 $device->{ids} = $alternative;
564 $found = 1;
565 last;
566 }
567 die "could not find a free device for 'hostpci$i'\n" if !$found;
568 }
569
570 return $devices;
571 }
572
573 sub print_hostpci_devices {
574 my ($vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder) = @_;
575
576 my $kvm_off = 0;
577 my $gpu_passthrough = 0;
578 my $legacy_igd = 0;
579
580 my $pciaddr;
581 my $pci_devices = choose_hostpci_devices(parse_hostpci_devices($conf), $vmid);
582
583 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
584 my $id = "hostpci$i";
585 my $d = $pci_devices->{$id};
586 next if !$d;
587
588 $legacy_igd = 1 if $d->{'legacy-igd'};
589
590 if (my $pcie = $d->{pcie}) {
591 # win7 wants to have the pcie devices directly on the pcie bus
592 # instead of in the root port
593 if ($winversion == 7) {
594 $pciaddr = print_pcie_addr("${id}bus0");
595 } else {
596 # add more root ports if needed, 4 are present by default
597 # by pve-q35 cfgs, rest added here on demand.
598 if ($i > 3) {
599 push @$devices, '-device', print_pcie_root_port($i);
600 }
601 $pciaddr = print_pcie_addr($id);
602 }
603 } else {
604 my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id;
605 $pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type);
606 }
607
608 my $num_devices = scalar($d->{ids}->@*);
609 my $multifunction = $num_devices > 1 && !$d->{mdev};
610
611 my $xvga = '';
612 if ($d->{'x-vga'}) {
613 $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf');
614 $kvm_off = 1;
615 $vga->{type} = 'none' if !defined($conf->{vga});
616 $gpu_passthrough = 1;
617 }
618
619 my $sysfspath;
620 if ($d->{mdev}) {
621 my $uuid = generate_mdev_uuid($vmid, $i);
622 $sysfspath = "/sys/bus/mdev/devices/$uuid";
623 }
624
625 for (my $j = 0; $j < $num_devices; $j++) {
626 my $pcidevice = $d->{ids}->[$j];
627 my $devicestr = "vfio-pci";
628
629 if ($sysfspath) {
630 $devicestr .= ",sysfsdev=$sysfspath";
631 } else {
632 $devicestr .= ",host=$pcidevice->{id}";
633 }
634
635 my $mf_addr = $multifunction ? ".$j" : '';
636 $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}";
637
638 if ($j == 0) {
639 $devicestr .= ',rombar=0' if defined($d->{rombar}) && !$d->{rombar};
640 $devicestr .= "$xvga";
641 $devicestr .= ",multifunction=on" if $multifunction;
642 $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile};
643 $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id};
644 for my $option (qw(vendor-id device-id sub-vendor-id sub-device-id)) {
645 $devicestr .= ",x-pci-$option=$d->{$option}" if $d->{$option};
646 }
647 }
648
649
650 push @$devices, '-device', $devicestr;
651 last if $d->{mdev};
652 }
653 }
654
655 return ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices);
656 }
657
658 sub prepare_pci_device {
659 my ($vmid, $pciid, $index, $mdev) = @_;
660
661 my $info = PVE::SysFSTools::pci_device_info("$pciid");
662 die "cannot prepare PCI pass-through, IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
663 die "no pci device info for device '$pciid'\n" if !$info;
664
665 if ($mdev) {
666 my $uuid = generate_mdev_uuid($vmid, $index);
667 PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
668 } else {
669 die "can't unbind/bind PCI group to VFIO '$pciid'\n"
670 if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
671 die "can't reset PCI device '$pciid'\n"
672 if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info);
673 }
674
675 return $info;
676 }
677
678 my $RUNDIR = '/run/qemu-server';
679 my $PCIID_RESERVATION_FILE = "${RUNDIR}/pci-id-reservations";
680 my $PCIID_RESERVATION_LOCK = "${PCIID_RESERVATION_FILE}.lock";
681
682 # a list of PCI ID to VMID reservations, the validity is protected against leakage by either a PID,
683 # for succesfully started VM processes, or a expiration time for the initial time window between
684 # reservation and actual VM process start-up.
685 my $parse_pci_reservation_unlocked = sub {
686 my $pciids = {};
687 if (my $fh = IO::File->new($PCIID_RESERVATION_FILE, "r")) {
688 while (my $line = <$fh>) {
689 if ($line =~ m/^($PCIRE)\s(\d+)\s(time|pid)\:(\d+)$/) {
690 $pciids->{$1} = {
691 vmid => $2,
692 "$3" => $4,
693 };
694 }
695 }
696 }
697 return $pciids;
698 };
699
700 my $write_pci_reservation_unlocked = sub {
701 my ($reservations) = @_;
702
703 my $data = "";
704 for my $pci_id (sort keys $reservations->%*) {
705 my ($vmid, $pid, $time) = $reservations->{$pci_id}->@{'vmid', 'pid', 'time'};
706 if (defined($pid)) {
707 $data .= "$pci_id $vmid pid:$pid\n";
708 } else {
709 $data .= "$pci_id $vmid time:$time\n";
710 }
711 }
712 PVE::Tools::file_set_contents($PCIID_RESERVATION_FILE, $data);
713 };
714
715 # removes all PCI device reservations held by the `vmid`
716 sub remove_pci_reservation {
717 my ($vmid) = @_;
718
719 PVE::Tools::lock_file($PCIID_RESERVATION_LOCK, 2, sub {
720 my $reservation_list = $parse_pci_reservation_unlocked->();
721 for my $id (keys %$reservation_list) {
722 my $reservation = $reservation_list->{$id};
723 next if $reservation->{vmid} != $vmid;
724 delete $reservation_list->{$id};
725 }
726 $write_pci_reservation_unlocked->($reservation_list);
727 });
728 die $@ if $@;
729 }
730
731 sub reserve_pci_usage {
732 my ($requested_ids, $vmid, $timeout, $pid) = @_;
733
734 $requested_ids = [ $requested_ids ] if !ref($requested_ids);
735 return if !scalar(@$requested_ids); # do nothing for empty list
736
737 PVE::Tools::lock_file($PCIID_RESERVATION_LOCK, 5, sub {
738 my $reservation_list = $parse_pci_reservation_unlocked->();
739
740 my $ctime = time();
741 for my $id ($requested_ids->@*) {
742 my $reservation = $reservation_list->{$id};
743 if ($reservation && $reservation->{vmid} != $vmid) {
744 # check time based reservation
745 die "PCI device '$id' is currently reserved for use by VMID '$reservation->{vmid}'\n"
746 if defined($reservation->{time}) && $reservation->{time} > $ctime;
747
748 if (my $reserved_pid = $reservation->{pid}) {
749 # check running vm
750 my $running_pid = PVE::QemuServer::Helpers::vm_running_locally($reservation->{vmid});
751 if (defined($running_pid) && $running_pid == $reserved_pid) {
752 die "PCI device '$id' already in use by VMID '$reservation->{vmid}'\n";
753 } else {
754 warn "leftover PCI reservation found for $id, lets take it...\n";
755 }
756 }
757 } elsif ($reservation) {
758 # already reserved by the same vmid
759 if (my $reserved_time = $reservation->{time}) {
760 if (defined($timeout)) {
761 # use the longer timeout
762 my $old_timeout = $reservation->{time} - 5 - $ctime;
763 $timeout = $old_timeout if $old_timeout > $timeout;
764 }
765 } elsif (my $reserved_pid = $reservation->{pid}) {
766 my $running_pid = PVE::QemuServer::Helpers::vm_running_locally($reservation->{vmid});
767 if (defined($running_pid) && $running_pid == $reservation->{pid}) {
768 if (defined($pid)) {
769 die "PCI device '$id' already in use by running VMID '$reservation->{vmid}'\n";
770 } elsif (defined($timeout)) {
771 # ignore timeout reservation for running vms, can happen with e.g.
772 # qm showcmd
773 return;
774 }
775 }
776 }
777 }
778
779 $reservation_list->{$id} = { vmid => $vmid };
780 if (defined($pid)) { # VM started up, we can reserve now with the actual PID
781 $reservation_list->{$id}->{pid} = $pid;
782 } elsif (defined($timeout)) { # tempoaray reserve as we don't now the PID yet
783 $reservation_list->{$id}->{time} = $ctime + $timeout + 5;
784 }
785 }
786 $write_pci_reservation_unlocked->($reservation_list);
787 });
788 die $@ if $@;
789 }
790
791 1;