1 package PVE
::QemuServer
::PCI
;
20 our $MAX_HOSTPCI_DEVICES = 16;
22 my $PCIRE = qr/(?:[a-f0-9]{4,}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
28 pattern
=> qr/$PCIRE(;$PCIRE)*/,
29 format_description
=> 'HOSTPCIID[;HOSTPCIID2...]',
30 description
=> <<EODESCR,
31 Host PCI device pass through. The PCI ID of a host's PCI device or a list
32 of PCI virtual functions of the host. HOSTPCIID syntax is:
34 'bus:dev.func' (hexadecimal numbers)
36 You can us the 'lspci' command to list existing PCI devices.
38 Either this or the 'mapping' key must be set.
44 format_description
=> 'mapping-id',
45 format
=> 'pve-configid',
46 description
=> "The ID of a cluster wide mapping. Either this or the default-key 'host'"
51 description
=> "Specify whether or not the device's ROM will be visible in the"
52 ." guest's memory map.",
59 format_description
=> 'string',
60 description
=> "Custom pci device rom filename (must be located in /usr/share/kvm/).",
65 description
=> "Choose the PCI-express bus (needs the 'q35' machine model).",
71 description
=> "Enable vfio-vga device support.",
77 description
=> "Pass this device in legacy IGD mode, making it the primary and exclusive"
78 ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.",
84 format_description
=> 'string',
85 pattern
=> '[^/\.:]+',
87 description
=> <<EODESCR
88 The type of mediated device to use.
89 An instance of this type will be created on startup of the VM and
90 will be cleaned up when the VM stops.
95 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
96 format_description
=> 'hex id',
98 description
=> "Override PCI vendor ID visible to guest"
102 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
103 format_description
=> 'hex id',
105 description
=> "Override PCI device ID visible to guest"
109 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
110 format_description
=> 'hex id',
112 description
=> "Override PCI subsystem vendor ID visible to guest"
116 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
117 format_description
=> 'hex id',
119 description
=> "Override PCI subsystem device ID visible to guest"
122 PVE
::JSONSchema
::register_format
('pve-qm-hostpci', $hostpci_fmt);
126 type
=> 'string', format
=> 'pve-qm-hostpci',
127 description
=> "Map host PCI devices into guest.",
128 verbose_description
=> <<EODESCR,
129 Map host PCI devices into guest.
131 NOTE: This option allows direct access to host hardware. So it is no longer
132 possible to migrate such machines - use with special care.
134 CAUTION: Experimental! User reported problems with this option.
137 PVE
::JSONSchema
::register_standard_option
("pve-qm-hostpci", $hostpcidesc);
140 sub get_pci_addr_map
{
142 piix3
=> { bus
=> 0, addr
=> 1, conflict_ok
=> qw(ehci) },
143 ehci
=> { bus
=> 0, addr
=> 1, conflict_ok
=> qw(piix3) }, # instead of piix3 on arm
144 vga
=> { bus
=> 0, addr
=> 2, conflict_ok
=> qw(legacy-igd) },
145 'legacy-igd' => { bus
=> 0, addr
=> 2, conflict_ok
=> qw(vga) }, # legacy-igd requires vga=none
146 balloon0
=> { bus
=> 0, addr
=> 3 },
147 watchdog
=> { bus
=> 0, addr
=> 4 },
148 scsihw0
=> { bus
=> 0, addr
=> 5, conflict_ok
=> qw(pci.3) },
149 'pci.3' => { bus
=> 0, addr
=> 5, conflict_ok
=> qw(scsihw0) }, # also used for virtio-scsi-single bridge
150 scsihw1
=> { bus
=> 0, addr
=> 6 },
151 ahci0
=> { bus
=> 0, addr
=> 7 },
152 qga0
=> { bus
=> 0, addr
=> 8 },
153 spice
=> { bus
=> 0, addr
=> 9 },
154 virtio0
=> { bus
=> 0, addr
=> 10 },
155 virtio1
=> { bus
=> 0, addr
=> 11 },
156 virtio2
=> { bus
=> 0, addr
=> 12 },
157 virtio3
=> { bus
=> 0, addr
=> 13 },
158 virtio4
=> { bus
=> 0, addr
=> 14 },
159 virtio5
=> { bus
=> 0, addr
=> 15 },
160 hostpci0
=> { bus
=> 0, addr
=> 16 },
161 hostpci1
=> { bus
=> 0, addr
=> 17 },
162 net0
=> { bus
=> 0, addr
=> 18 },
163 net1
=> { bus
=> 0, addr
=> 19 },
164 net2
=> { bus
=> 0, addr
=> 20 },
165 net3
=> { bus
=> 0, addr
=> 21 },
166 net4
=> { bus
=> 0, addr
=> 22 },
167 net5
=> { bus
=> 0, addr
=> 23 },
168 vga1
=> { bus
=> 0, addr
=> 24 },
169 vga2
=> { bus
=> 0, addr
=> 25 },
170 vga3
=> { bus
=> 0, addr
=> 26 },
171 hostpci2
=> { bus
=> 0, addr
=> 27 },
172 hostpci3
=> { bus
=> 0, addr
=> 28 },
173 #addr29 : usb-host (pve-usb.cfg)
174 'pci.1' => { bus
=> 0, addr
=> 30 },
175 'pci.2' => { bus
=> 0, addr
=> 31 },
176 'net6' => { bus
=> 1, addr
=> 1 },
177 'net7' => { bus
=> 1, addr
=> 2 },
178 'net8' => { bus
=> 1, addr
=> 3 },
179 'net9' => { bus
=> 1, addr
=> 4 },
180 'net10' => { bus
=> 1, addr
=> 5 },
181 'net11' => { bus
=> 1, addr
=> 6 },
182 'net12' => { bus
=> 1, addr
=> 7 },
183 'net13' => { bus
=> 1, addr
=> 8 },
184 'net14' => { bus
=> 1, addr
=> 9 },
185 'net15' => { bus
=> 1, addr
=> 10 },
186 'net16' => { bus
=> 1, addr
=> 11 },
187 'net17' => { bus
=> 1, addr
=> 12 },
188 'net18' => { bus
=> 1, addr
=> 13 },
189 'net19' => { bus
=> 1, addr
=> 14 },
190 'net20' => { bus
=> 1, addr
=> 15 },
191 'net21' => { bus
=> 1, addr
=> 16 },
192 'net22' => { bus
=> 1, addr
=> 17 },
193 'net23' => { bus
=> 1, addr
=> 18 },
194 'net24' => { bus
=> 1, addr
=> 19 },
195 'net25' => { bus
=> 1, addr
=> 20 },
196 'net26' => { bus
=> 1, addr
=> 21 },
197 'net27' => { bus
=> 1, addr
=> 22 },
198 'net28' => { bus
=> 1, addr
=> 23 },
199 'net29' => { bus
=> 1, addr
=> 24 },
200 'net30' => { bus
=> 1, addr
=> 25 },
201 'net31' => { bus
=> 1, addr
=> 26 },
202 'xhci' => { bus
=> 1, addr
=> 27 },
203 'pci.4' => { bus
=> 1, addr
=> 28 },
204 'rng0' => { bus
=> 1, addr
=> 29 },
205 'pci.2-igd' => { bus
=> 1, addr
=> 30 }, # replaces pci.2 in case a legacy IGD device is passed through
206 'virtio6' => { bus
=> 2, addr
=> 1 },
207 'virtio7' => { bus
=> 2, addr
=> 2 },
208 'virtio8' => { bus
=> 2, addr
=> 3 },
209 'virtio9' => { bus
=> 2, addr
=> 4 },
210 'virtio10' => { bus
=> 2, addr
=> 5 },
211 'virtio11' => { bus
=> 2, addr
=> 6 },
212 'virtio12' => { bus
=> 2, addr
=> 7 },
213 'virtio13' => { bus
=> 2, addr
=> 8 },
214 'virtio14' => { bus
=> 2, addr
=> 9 },
215 'virtio15' => { bus
=> 2, addr
=> 10 },
216 'ivshmem' => { bus
=> 2, addr
=> 11 },
217 'audio0' => { bus
=> 2, addr
=> 12 },
218 hostpci4
=> { bus
=> 2, addr
=> 13 },
219 hostpci5
=> { bus
=> 2, addr
=> 14 },
220 hostpci6
=> { bus
=> 2, addr
=> 15 },
221 hostpci7
=> { bus
=> 2, addr
=> 16 },
222 hostpci8
=> { bus
=> 2, addr
=> 17 },
223 hostpci9
=> { bus
=> 2, addr
=> 18 },
224 hostpci10
=> { bus
=> 2, addr
=> 19 },
225 hostpci11
=> { bus
=> 2, addr
=> 20 },
226 hostpci12
=> { bus
=> 2, addr
=> 21 },
227 hostpci13
=> { bus
=> 2, addr
=> 22 },
228 hostpci14
=> { bus
=> 2, addr
=> 23 },
229 hostpci15
=> { bus
=> 2, addr
=> 24 },
230 'virtioscsi0' => { bus
=> 3, addr
=> 1 },
231 'virtioscsi1' => { bus
=> 3, addr
=> 2 },
232 'virtioscsi2' => { bus
=> 3, addr
=> 3 },
233 'virtioscsi3' => { bus
=> 3, addr
=> 4 },
234 'virtioscsi4' => { bus
=> 3, addr
=> 5 },
235 'virtioscsi5' => { bus
=> 3, addr
=> 6 },
236 'virtioscsi6' => { bus
=> 3, addr
=> 7 },
237 'virtioscsi7' => { bus
=> 3, addr
=> 8 },
238 'virtioscsi8' => { bus
=> 3, addr
=> 9 },
239 'virtioscsi9' => { bus
=> 3, addr
=> 10 },
240 'virtioscsi10' => { bus
=> 3, addr
=> 11 },
241 'virtioscsi11' => { bus
=> 3, addr
=> 12 },
242 'virtioscsi12' => { bus
=> 3, addr
=> 13 },
243 'virtioscsi13' => { bus
=> 3, addr
=> 14 },
244 'virtioscsi14' => { bus
=> 3, addr
=> 15 },
245 'virtioscsi15' => { bus
=> 3, addr
=> 16 },
246 'virtioscsi16' => { bus
=> 3, addr
=> 17 },
247 'virtioscsi17' => { bus
=> 3, addr
=> 18 },
248 'virtioscsi18' => { bus
=> 3, addr
=> 19 },
249 'virtioscsi19' => { bus
=> 3, addr
=> 20 },
250 'virtioscsi20' => { bus
=> 3, addr
=> 21 },
251 'virtioscsi21' => { bus
=> 3, addr
=> 22 },
252 'virtioscsi22' => { bus
=> 3, addr
=> 23 },
253 'virtioscsi23' => { bus
=> 3, addr
=> 24 },
254 'virtioscsi24' => { bus
=> 3, addr
=> 25 },
255 'virtioscsi25' => { bus
=> 3, addr
=> 26 },
256 'virtioscsi26' => { bus
=> 3, addr
=> 27 },
257 'virtioscsi27' => { bus
=> 3, addr
=> 28 },
258 'virtioscsi28' => { bus
=> 3, addr
=> 29 },
259 'virtioscsi29' => { bus
=> 3, addr
=> 30 },
260 'virtioscsi30' => { bus
=> 3, addr
=> 31 },
261 'scsihw2' => { bus
=> 4, addr
=> 1 },
262 'scsihw3' => { bus
=> 4, addr
=> 2 },
263 'scsihw4' => { bus
=> 4, addr
=> 3 },
264 } if !defined($pci_addr_map);
265 return $pci_addr_map;
268 sub generate_mdev_uuid
{
269 my ($vmid, $index) = @_;
270 return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
273 my $get_addr_mapping_from_id = sub {
277 return if !defined($d) || !defined($d->{bus
}) || !defined($d->{addr
});
279 return { bus
=> $d->{bus
}, addr
=> sprintf("0x%x", $d->{addr
}) };
283 my ($id, $bridges, $arch, $machine) = @_;
287 # using same bus slots on all HW, so we need to check special cases here:
289 if ($arch eq 'aarch64' && $machine =~ /^virt/) {
290 die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide
/;
294 my $map = get_pci_addr_map
();
295 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
296 $res = ",bus=$busname.$d->{bus},addr=$d->{addr}";
297 $bridges->{$d->{bus
}} = 1 if $bridges;
304 sub get_pcie_addr_map
{
306 vga
=> { bus
=> 'pcie.0', addr
=> 1 },
307 hostpci0
=> { bus
=> "ich9-pcie-port-1", addr
=> 0 },
308 hostpci1
=> { bus
=> "ich9-pcie-port-2", addr
=> 0 },
309 hostpci2
=> { bus
=> "ich9-pcie-port-3", addr
=> 0 },
310 hostpci3
=> { bus
=> "ich9-pcie-port-4", addr
=> 0 },
311 hostpci4
=> { bus
=> "ich9-pcie-port-5", addr
=> 0 },
312 hostpci5
=> { bus
=> "ich9-pcie-port-6", addr
=> 0 },
313 hostpci6
=> { bus
=> "ich9-pcie-port-7", addr
=> 0 },
314 hostpci7
=> { bus
=> "ich9-pcie-port-8", addr
=> 0 },
315 hostpci8
=> { bus
=> "ich9-pcie-port-9", addr
=> 0 },
316 hostpci9
=> { bus
=> "ich9-pcie-port-10", addr
=> 0 },
317 hostpci10
=> { bus
=> "ich9-pcie-port-11", addr
=> 0 },
318 hostpci11
=> { bus
=> "ich9-pcie-port-12", addr
=> 0 },
319 hostpci12
=> { bus
=> "ich9-pcie-port-13", addr
=> 0 },
320 hostpci13
=> { bus
=> "ich9-pcie-port-14", addr
=> 0 },
321 hostpci14
=> { bus
=> "ich9-pcie-port-15", addr
=> 0 },
322 hostpci15
=> { bus
=> "ich9-pcie-port-16", addr
=> 0 },
323 # win7 is picky about pcie assignments
324 hostpci0bus0
=> { bus
=> "pcie.0", addr
=> 16 },
325 hostpci1bus0
=> { bus
=> "pcie.0", addr
=> 17 },
326 hostpci2bus0
=> { bus
=> "pcie.0", addr
=> 18 },
327 hostpci3bus0
=> { bus
=> "pcie.0", addr
=> 19 },
328 ivshmem
=> { bus
=> 'pcie.0', addr
=> 20 },
329 hostpci4bus0
=> { bus
=> "pcie.0", addr
=> 9 },
330 hostpci5bus0
=> { bus
=> "pcie.0", addr
=> 10 },
331 hostpci6bus0
=> { bus
=> "pcie.0", addr
=> 11 },
332 hostpci7bus0
=> { bus
=> "pcie.0", addr
=> 12 },
333 hostpci8bus0
=> { bus
=> "pcie.0", addr
=> 13 },
334 hostpci9bus0
=> { bus
=> "pcie.0", addr
=> 14 },
335 hostpci10bus0
=> { bus
=> "pcie.0", addr
=> 15 },
336 hostpci11bus0
=> { bus
=> "pcie.0", addr
=> 21 },
337 hostpci12bus0
=> { bus
=> "pcie.0", addr
=> 22 },
338 hostpci13bus0
=> { bus
=> "pcie.0", addr
=> 23 },
339 hostpci14bus0
=> { bus
=> "pcie.0", addr
=> 24 },
340 hostpci15bus0
=> { bus
=> "pcie.0", addr
=> 25 },
341 } if !defined($pcie_addr_map);
343 return $pcie_addr_map;
346 sub print_pcie_addr
{
351 my $map = get_pcie_addr_map
($id);
352 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
353 $res = ",bus=$d->{bus},addr=$d->{addr}";
359 # Generates the device strings for additional pcie root ports. The first 4 pcie
360 # root ports are defined in the pve-q35*.cfg files.
361 sub print_pcie_root_port
{
365 my $root_port_addresses = {
380 if (defined($root_port_addresses->{$i})) {
382 $res = "pcie-root-port,id=ich9-pcie-port-${id}";
383 $res .= ",addr=$root_port_addresses->{$i}";
384 $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0";
385 $res .= ",port=${id},chassis=${id}";
391 # returns the parsed pci config but parses the 'host' part into
392 # a list if lists into the 'id' property like this:
399 # # this contains a list of alternative devices,
401 # # which are itself lists of ids for one multifunction device
403 # id => "0000:00:00.0",
407 # id => "0000:00:00.1",
422 my $res = PVE
::JSONSchema
::parse_property_string
($hostpci_fmt, $value);
424 my $alternatives = [];
425 my $host = delete $res->{host
};
426 my $mapping = delete $res->{mapping
};
428 die "Cannot set both 'host' and 'mapping'.\n" if defined($host) && defined($mapping);
431 # we have no ordinary pci id, must be a mapping
432 my $devices = PVE
::Mapping
::PCI
::find_on_current_node
($mapping);
433 die "PCI device mapping not found for '$mapping'\n" if !$devices || !scalar($devices->@*);
435 for my $device ($devices->@*) {
436 eval { PVE
::Mapping
::PCI
::assert_valid
($mapping, $device) };
437 die "PCI device mapping invalid (hardware probably changed): $@\n" if $@;
438 push $alternatives->@*, [split(/;/, $device->{path
})];
441 push $alternatives->@*, [split(/;/, $host)];
443 die "Either 'host' or 'mapping' must be set.\n";
447 for my $alternative ($alternatives->@*) {
449 foreach my $id ($alternative->@*) {
450 my $devs = PVE
::SysFSTools
::lspci
($id);
451 die "no PCI device found for '$id'\n" if !scalar($devs->@*);
452 push $ids->@*, @$devs;
454 if (scalar($ids->@*) > 1) {
455 $res->{'has-multifunction'} = 1;
456 die "cannot use mediated device with multifunction device\n" if $res->{mdev
};
458 push $res->{ids
}->@*, $ids;
464 # parses all hostpci devices from a config and does some sanity checks
465 # returns a hash like this:
468 # # hash from parse_hostpci function
470 # hostpci1 => { ... },
473 sub parse_hostpci_devices
{
476 my $q35 = PVE
::QemuServer
::Machine
::machine_type_is_q35
($conf);
479 my $parsed_devices = {};
480 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
481 my $id = "hostpci$i";
482 my $d = parse_hostpci
($conf->{$id});
486 die "q35 machine model is not enabled" if !$q35 && $d->{pcie
};
488 if ($d->{'legacy-igd'}) {
489 die "only one device can be assigned in legacy-igd mode\n"
493 die "legacy IGD assignment requires VGA mode to be 'none'\n"
494 if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none';
495 die "legacy IGD assignment requires rombar to be enabled\n"
496 if defined($d->{rombar
}) && !$d->{rombar
};
497 die "legacy IGD assignment is not compatible with x-vga\n"
499 die "legacy IGD assignment is not compatible with mdev\n"
501 die "legacy IGD assignment is not compatible with q35\n"
503 die "legacy IGD assignment is not compatible with multifunction devices\n"
504 if $d->{'has-multifunction'};
505 die "legacy IGD assignment is not compatible with alternate devices\n"
506 if scalar($d->{ids
}->@*) > 1;
507 # check first device for valid id
508 die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
509 if $d->{ids
}->[0]->[0]->{id
} !~ m/02\.0$/;
512 $parsed_devices->{$id} = $d;
515 return $parsed_devices;
518 # takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
519 # selects one of the given alternatives by trying to reserve it
521 # mdev devices must be chosen later when we actually allocate it, but we
522 # flatten the inner list since there can only be one device per alternative anyway
523 my sub choose_hostpci_devices
{
524 my ($devices, $vmid) = @_;
528 my $add_used_device = sub {
530 for my $used_device ($devices->@*) {
531 my $used_id = $used_device->{id
};
532 die "device '$used_id' assigned more than once\n" if $used->{$used_id};
533 $used->{$used_id} = 1;
537 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
538 my $device = $devices->{"hostpci$i"};
541 if ($device->{mdev
}) {
542 $device->{ids
} = [ map { $_->[0] } $device->{ids
}->@* ];
546 if (scalar($device->{ids
}->@* == 1)) {
547 # we only have one alternative, use that
548 $device->{ids
} = $device->{ids
}->[0];
549 $add_used_device->($device->{ids
});
554 for my $alternative ($device->{ids
}->@*) {
555 my $ids = [map { $_->{id
} } @$alternative];
557 next if grep { defined($used->{$_}) } @$ids; # already used
558 eval { reserve_pci_usage
($ids, $vmid, 10, undef) };
561 # found one that is not used or reserved
562 $add_used_device->($alternative);
563 $device->{ids
} = $alternative;
567 die "could not find a free device for 'hostpci$i'\n" if !$found;
573 sub print_hostpci_devices
{
574 my ($vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder) = @_;
577 my $gpu_passthrough = 0;
581 my $pci_devices = choose_hostpci_devices
(parse_hostpci_devices
($conf), $vmid);
583 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
584 my $id = "hostpci$i";
585 my $d = $pci_devices->{$id};
588 $legacy_igd = 1 if $d->{'legacy-igd'};
590 if (my $pcie = $d->{pcie
}) {
591 # win7 wants to have the pcie devices directly on the pcie bus
592 # instead of in the root port
593 if ($winversion == 7) {
594 $pciaddr = print_pcie_addr
("${id}bus0");
596 # add more root ports if needed, 4 are present by default
597 # by pve-q35 cfgs, rest added here on demand.
599 push @$devices, '-device', print_pcie_root_port
($i);
601 $pciaddr = print_pcie_addr
($id);
604 my $pci_name = $d->{'legacy-igd'} ?
'legacy-igd' : $id;
605 $pciaddr = print_pci_addr
($pci_name, $bridges, $arch, $machine_type);
608 my $num_devices = scalar($d->{ids
}->@*);
609 my $multifunction = $num_devices > 1 && !$d->{mdev
};
613 $xvga = ',x-vga=on' if !($conf->{bios
} && $conf->{bios
} eq 'ovmf');
615 $vga->{type
} = 'none' if !defined($conf->{vga
});
616 $gpu_passthrough = 1;
621 my $uuid = generate_mdev_uuid
($vmid, $i);
622 $sysfspath = "/sys/bus/mdev/devices/$uuid";
625 for (my $j = 0; $j < $num_devices; $j++) {
626 my $pcidevice = $d->{ids
}->[$j];
627 my $devicestr = "vfio-pci";
630 $devicestr .= ",sysfsdev=$sysfspath";
632 $devicestr .= ",host=$pcidevice->{id}";
635 my $mf_addr = $multifunction ?
".$j" : '';
636 $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}";
639 $devicestr .= ',rombar=0' if defined($d->{rombar
}) && !$d->{rombar
};
640 $devicestr .= "$xvga";
641 $devicestr .= ",multifunction=on" if $multifunction;
642 $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile
};
643 $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id};
644 for my $option (qw(vendor-id device-id sub-vendor-id sub-device-id)) {
645 $devicestr .= ",x-pci-$option=$d->{$option}" if $d->{$option};
650 push @$devices, '-device', $devicestr;
655 return ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices);
658 sub prepare_pci_device
{
659 my ($vmid, $pciid, $index, $mdev) = @_;
661 my $info = PVE
::SysFSTools
::pci_device_info
("$pciid");
662 die "cannot prepare PCI pass-through, IOMMU not present\n" if !PVE
::SysFSTools
::check_iommu_support
();
663 die "no pci device info for device '$pciid'\n" if !$info;
666 my $uuid = generate_mdev_uuid
($vmid, $index);
667 PVE
::SysFSTools
::pci_create_mdev_device
($pciid, $uuid, $mdev);
669 die "can't unbind/bind PCI group to VFIO '$pciid'\n"
670 if !PVE
::SysFSTools
::pci_dev_group_bind_to_vfio
($pciid);
671 die "can't reset PCI device '$pciid'\n"
672 if $info->{has_fl_reset
} && !PVE
::SysFSTools
::pci_dev_reset
($info);
678 my $RUNDIR = '/run/qemu-server';
679 my $PCIID_RESERVATION_FILE = "${RUNDIR}/pci-id-reservations";
680 my $PCIID_RESERVATION_LOCK = "${PCIID_RESERVATION_FILE}.lock";
682 # a list of PCI ID to VMID reservations, the validity is protected against leakage by either a PID,
683 # for succesfully started VM processes, or a expiration time for the initial time window between
684 # reservation and actual VM process start-up.
685 my $parse_pci_reservation_unlocked = sub {
687 if (my $fh = IO
::File-
>new($PCIID_RESERVATION_FILE, "r")) {
688 while (my $line = <$fh>) {
689 if ($line =~ m/^($PCIRE)\s(\d+)\s(time|pid)\:(\d+)$/) {
700 my $write_pci_reservation_unlocked = sub {
701 my ($reservations) = @_;
704 for my $pci_id (sort keys $reservations->%*) {
705 my ($vmid, $pid, $time) = $reservations->{$pci_id}->@{'vmid', 'pid', 'time'};
707 $data .= "$pci_id $vmid pid:$pid\n";
709 $data .= "$pci_id $vmid time:$time\n";
712 PVE
::Tools
::file_set_contents
($PCIID_RESERVATION_FILE, $data);
715 # removes all PCI device reservations held by the `vmid`
716 sub remove_pci_reservation
{
719 PVE
::Tools
::lock_file
($PCIID_RESERVATION_LOCK, 2, sub {
720 my $reservation_list = $parse_pci_reservation_unlocked->();
721 for my $id (keys %$reservation_list) {
722 my $reservation = $reservation_list->{$id};
723 next if $reservation->{vmid
} != $vmid;
724 delete $reservation_list->{$id};
726 $write_pci_reservation_unlocked->($reservation_list);
731 sub reserve_pci_usage
{
732 my ($requested_ids, $vmid, $timeout, $pid) = @_;
734 $requested_ids = [ $requested_ids ] if !ref($requested_ids);
735 return if !scalar(@$requested_ids); # do nothing for empty list
737 PVE
::Tools
::lock_file
($PCIID_RESERVATION_LOCK, 5, sub {
738 my $reservation_list = $parse_pci_reservation_unlocked->();
741 for my $id ($requested_ids->@*) {
742 my $reservation = $reservation_list->{$id};
743 if ($reservation && $reservation->{vmid
} != $vmid) {
744 # check time based reservation
745 die "PCI device '$id' is currently reserved for use by VMID '$reservation->{vmid}'\n"
746 if defined($reservation->{time}) && $reservation->{time} > $ctime;
748 if (my $reserved_pid = $reservation->{pid
}) {
750 my $running_pid = PVE
::QemuServer
::Helpers
::vm_running_locally
($reservation->{vmid
});
751 if (defined($running_pid) && $running_pid == $reserved_pid) {
752 die "PCI device '$id' already in use by VMID '$reservation->{vmid}'\n";
754 warn "leftover PCI reservation found for $id, lets take it...\n";
757 } elsif ($reservation) {
758 # already reserved by the same vmid
759 if (my $reserved_time = $reservation->{time}) {
760 if (defined($timeout)) {
761 # use the longer timeout
762 my $old_timeout = $reservation->{time} - 5 - $ctime;
763 $timeout = $old_timeout if $old_timeout > $timeout;
765 } elsif (my $reserved_pid = $reservation->{pid
}) {
766 my $running_pid = PVE
::QemuServer
::Helpers
::vm_running_locally
($reservation->{vmid
});
767 if (defined($running_pid) && $running_pid == $reservation->{pid
}) {
769 die "PCI device '$id' already in use by running VMID '$reservation->{vmid}'\n";
770 } elsif (defined($timeout)) {
771 # ignore timeout reservation for running vms, can happen with e.g.
779 $reservation_list->{$id} = { vmid
=> $vmid };
780 if (defined($pid)) { # VM started up, we can reserve now with the actual PID
781 $reservation_list->{$id}->{pid
} = $pid;
782 } elsif (defined($timeout)) { # tempoaray reserve as we don't now the PID yet
783 $reservation_list->{$id}->{time} = $ctime + $timeout + 5;
786 $write_pci_reservation_unlocked->($reservation_list);