1 package PVE
::QemuServer
::PCI
;
19 our $MAX_HOSTPCI_DEVICES = 16;
21 my $PCIRE = qr/(?:[a-f0-9]{4}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
26 pattern
=> qr/$PCIRE(;$PCIRE)*/,
27 format_description
=> 'HOSTPCIID[;HOSTPCIID2...]',
28 description
=> <<EODESCR,
29 Host PCI device pass through. The PCI ID of a host's PCI device or a list
30 of PCI virtual functions of the host. HOSTPCIID syntax is:
32 'bus:dev.func' (hexadecimal numbers)
34 You can us the 'lspci' command to list existing PCI devices.
39 description
=> "Specify whether or not the device's ROM will be visible in the"
40 ." guest's memory map.",
47 format_description
=> 'string',
48 description
=> "Custom pci device rom filename (must be located in /usr/share/kvm/).",
53 description
=> "Choose the PCI-express bus (needs the 'q35' machine model).",
59 description
=> "Enable vfio-vga device support.",
65 description
=> "Pass this device in legacy IGD mode, making it the primary and exclusive"
66 ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.",
72 format_description
=> 'string',
73 pattern
=> '[^/\.:]+',
75 description
=> <<EODESCR
76 The type of mediated device to use.
77 An instance of this type will be created on startup of the VM and
78 will be cleaned up when the VM stops.
83 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
84 format_description
=> 'hex id',
86 description
=> "Override PCI vendor ID visible to guest"
90 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
91 format_description
=> 'hex id',
93 description
=> "Override PCI device ID visible to guest"
97 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
98 format_description
=> 'hex id',
100 description
=> "Override PCI subsystem vendor ID visible to guest"
104 pattern
=> qr/^0x[0-9a-fA-F]{4}$/,
105 format_description
=> 'hex id',
107 description
=> "Override PCI subsystem device ID visible to guest"
110 PVE
::JSONSchema
::register_format
('pve-qm-hostpci', $hostpci_fmt);
114 type
=> 'string', format
=> 'pve-qm-hostpci',
115 description
=> "Map host PCI devices into guest.",
116 verbose_description
=> <<EODESCR,
117 Map host PCI devices into guest.
119 NOTE: This option allows direct access to host hardware. So it is no longer
120 possible to migrate such machines - use with special care.
122 CAUTION: Experimental! User reported problems with this option.
125 PVE
::JSONSchema
::register_standard_option
("pve-qm-hostpci", $hostpcidesc);
128 sub get_pci_addr_map
{
130 piix3
=> { bus
=> 0, addr
=> 1, conflict_ok
=> qw(ehci) },
131 ehci
=> { bus
=> 0, addr
=> 1, conflict_ok
=> qw(piix3) }, # instead of piix3 on arm
132 vga
=> { bus
=> 0, addr
=> 2, conflict_ok
=> qw(legacy-igd) },
133 'legacy-igd' => { bus
=> 0, addr
=> 2, conflict_ok
=> qw(vga) }, # legacy-igd requires vga=none
134 balloon0
=> { bus
=> 0, addr
=> 3 },
135 watchdog
=> { bus
=> 0, addr
=> 4 },
136 scsihw0
=> { bus
=> 0, addr
=> 5, conflict_ok
=> qw(pci.3) },
137 'pci.3' => { bus
=> 0, addr
=> 5, conflict_ok
=> qw(scsihw0) }, # also used for virtio-scsi-single bridge
138 scsihw1
=> { bus
=> 0, addr
=> 6 },
139 ahci0
=> { bus
=> 0, addr
=> 7 },
140 qga0
=> { bus
=> 0, addr
=> 8 },
141 spice
=> { bus
=> 0, addr
=> 9 },
142 virtio0
=> { bus
=> 0, addr
=> 10 },
143 virtio1
=> { bus
=> 0, addr
=> 11 },
144 virtio2
=> { bus
=> 0, addr
=> 12 },
145 virtio3
=> { bus
=> 0, addr
=> 13 },
146 virtio4
=> { bus
=> 0, addr
=> 14 },
147 virtio5
=> { bus
=> 0, addr
=> 15 },
148 hostpci0
=> { bus
=> 0, addr
=> 16 },
149 hostpci1
=> { bus
=> 0, addr
=> 17 },
150 net0
=> { bus
=> 0, addr
=> 18 },
151 net1
=> { bus
=> 0, addr
=> 19 },
152 net2
=> { bus
=> 0, addr
=> 20 },
153 net3
=> { bus
=> 0, addr
=> 21 },
154 net4
=> { bus
=> 0, addr
=> 22 },
155 net5
=> { bus
=> 0, addr
=> 23 },
156 vga1
=> { bus
=> 0, addr
=> 24 },
157 vga2
=> { bus
=> 0, addr
=> 25 },
158 vga3
=> { bus
=> 0, addr
=> 26 },
159 hostpci2
=> { bus
=> 0, addr
=> 27 },
160 hostpci3
=> { bus
=> 0, addr
=> 28 },
161 #addr29 : usb-host (pve-usb.cfg)
162 'pci.1' => { bus
=> 0, addr
=> 30 },
163 'pci.2' => { bus
=> 0, addr
=> 31 },
164 'net6' => { bus
=> 1, addr
=> 1 },
165 'net7' => { bus
=> 1, addr
=> 2 },
166 'net8' => { bus
=> 1, addr
=> 3 },
167 'net9' => { bus
=> 1, addr
=> 4 },
168 'net10' => { bus
=> 1, addr
=> 5 },
169 'net11' => { bus
=> 1, addr
=> 6 },
170 'net12' => { bus
=> 1, addr
=> 7 },
171 'net13' => { bus
=> 1, addr
=> 8 },
172 'net14' => { bus
=> 1, addr
=> 9 },
173 'net15' => { bus
=> 1, addr
=> 10 },
174 'net16' => { bus
=> 1, addr
=> 11 },
175 'net17' => { bus
=> 1, addr
=> 12 },
176 'net18' => { bus
=> 1, addr
=> 13 },
177 'net19' => { bus
=> 1, addr
=> 14 },
178 'net20' => { bus
=> 1, addr
=> 15 },
179 'net21' => { bus
=> 1, addr
=> 16 },
180 'net22' => { bus
=> 1, addr
=> 17 },
181 'net23' => { bus
=> 1, addr
=> 18 },
182 'net24' => { bus
=> 1, addr
=> 19 },
183 'net25' => { bus
=> 1, addr
=> 20 },
184 'net26' => { bus
=> 1, addr
=> 21 },
185 'net27' => { bus
=> 1, addr
=> 22 },
186 'net28' => { bus
=> 1, addr
=> 23 },
187 'net29' => { bus
=> 1, addr
=> 24 },
188 'net30' => { bus
=> 1, addr
=> 25 },
189 'net31' => { bus
=> 1, addr
=> 26 },
190 'xhci' => { bus
=> 1, addr
=> 27 },
191 'pci.4' => { bus
=> 1, addr
=> 28 },
192 'rng0' => { bus
=> 1, addr
=> 29 },
193 'pci.2-igd' => { bus
=> 1, addr
=> 30 }, # replaces pci.2 in case a legacy IGD device is passed through
194 'virtio6' => { bus
=> 2, addr
=> 1 },
195 'virtio7' => { bus
=> 2, addr
=> 2 },
196 'virtio8' => { bus
=> 2, addr
=> 3 },
197 'virtio9' => { bus
=> 2, addr
=> 4 },
198 'virtio10' => { bus
=> 2, addr
=> 5 },
199 'virtio11' => { bus
=> 2, addr
=> 6 },
200 'virtio12' => { bus
=> 2, addr
=> 7 },
201 'virtio13' => { bus
=> 2, addr
=> 8 },
202 'virtio14' => { bus
=> 2, addr
=> 9 },
203 'virtio15' => { bus
=> 2, addr
=> 10 },
204 'ivshmem' => { bus
=> 2, addr
=> 11 },
205 'audio0' => { bus
=> 2, addr
=> 12 },
206 hostpci4
=> { bus
=> 2, addr
=> 13 },
207 hostpci5
=> { bus
=> 2, addr
=> 14 },
208 hostpci6
=> { bus
=> 2, addr
=> 15 },
209 hostpci7
=> { bus
=> 2, addr
=> 16 },
210 hostpci8
=> { bus
=> 2, addr
=> 17 },
211 hostpci9
=> { bus
=> 2, addr
=> 18 },
212 hostpci10
=> { bus
=> 2, addr
=> 19 },
213 hostpci11
=> { bus
=> 2, addr
=> 20 },
214 hostpci12
=> { bus
=> 2, addr
=> 21 },
215 hostpci13
=> { bus
=> 2, addr
=> 22 },
216 hostpci14
=> { bus
=> 2, addr
=> 23 },
217 hostpci15
=> { bus
=> 2, addr
=> 24 },
218 'virtioscsi0' => { bus
=> 3, addr
=> 1 },
219 'virtioscsi1' => { bus
=> 3, addr
=> 2 },
220 'virtioscsi2' => { bus
=> 3, addr
=> 3 },
221 'virtioscsi3' => { bus
=> 3, addr
=> 4 },
222 'virtioscsi4' => { bus
=> 3, addr
=> 5 },
223 'virtioscsi5' => { bus
=> 3, addr
=> 6 },
224 'virtioscsi6' => { bus
=> 3, addr
=> 7 },
225 'virtioscsi7' => { bus
=> 3, addr
=> 8 },
226 'virtioscsi8' => { bus
=> 3, addr
=> 9 },
227 'virtioscsi9' => { bus
=> 3, addr
=> 10 },
228 'virtioscsi10' => { bus
=> 3, addr
=> 11 },
229 'virtioscsi11' => { bus
=> 3, addr
=> 12 },
230 'virtioscsi12' => { bus
=> 3, addr
=> 13 },
231 'virtioscsi13' => { bus
=> 3, addr
=> 14 },
232 'virtioscsi14' => { bus
=> 3, addr
=> 15 },
233 'virtioscsi15' => { bus
=> 3, addr
=> 16 },
234 'virtioscsi16' => { bus
=> 3, addr
=> 17 },
235 'virtioscsi17' => { bus
=> 3, addr
=> 18 },
236 'virtioscsi18' => { bus
=> 3, addr
=> 19 },
237 'virtioscsi19' => { bus
=> 3, addr
=> 20 },
238 'virtioscsi20' => { bus
=> 3, addr
=> 21 },
239 'virtioscsi21' => { bus
=> 3, addr
=> 22 },
240 'virtioscsi22' => { bus
=> 3, addr
=> 23 },
241 'virtioscsi23' => { bus
=> 3, addr
=> 24 },
242 'virtioscsi24' => { bus
=> 3, addr
=> 25 },
243 'virtioscsi25' => { bus
=> 3, addr
=> 26 },
244 'virtioscsi26' => { bus
=> 3, addr
=> 27 },
245 'virtioscsi27' => { bus
=> 3, addr
=> 28 },
246 'virtioscsi28' => { bus
=> 3, addr
=> 29 },
247 'virtioscsi29' => { bus
=> 3, addr
=> 30 },
248 'virtioscsi30' => { bus
=> 3, addr
=> 31 },
249 'scsihw2' => { bus
=> 4, addr
=> 1 },
250 'scsihw3' => { bus
=> 4, addr
=> 2 },
251 'scsihw4' => { bus
=> 4, addr
=> 3 },
252 } if !defined($pci_addr_map);
253 return $pci_addr_map;
256 my sub generate_mdev_uuid
{
257 my ($vmid, $index) = @_;
258 return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
261 my $get_addr_mapping_from_id = sub {
265 return if !defined($d) || !defined($d->{bus
}) || !defined($d->{addr
});
267 return { bus
=> $d->{bus
}, addr
=> sprintf("0x%x", $d->{addr
}) };
271 my ($id, $bridges, $arch, $machine) = @_;
275 # using same bus slots on all HW, so we need to check special cases here:
277 if ($arch eq 'aarch64' && $machine =~ /^virt/) {
278 die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide
/;
282 my $map = get_pci_addr_map
();
283 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
284 $res = ",bus=$busname.$d->{bus},addr=$d->{addr}";
285 $bridges->{$d->{bus
}} = 1 if $bridges;
292 sub get_pcie_addr_map
{
294 vga
=> { bus
=> 'pcie.0', addr
=> 1 },
295 hostpci0
=> { bus
=> "ich9-pcie-port-1", addr
=> 0 },
296 hostpci1
=> { bus
=> "ich9-pcie-port-2", addr
=> 0 },
297 hostpci2
=> { bus
=> "ich9-pcie-port-3", addr
=> 0 },
298 hostpci3
=> { bus
=> "ich9-pcie-port-4", addr
=> 0 },
299 hostpci4
=> { bus
=> "ich9-pcie-port-5", addr
=> 0 },
300 hostpci5
=> { bus
=> "ich9-pcie-port-6", addr
=> 0 },
301 hostpci6
=> { bus
=> "ich9-pcie-port-7", addr
=> 0 },
302 hostpci7
=> { bus
=> "ich9-pcie-port-8", addr
=> 0 },
303 hostpci8
=> { bus
=> "ich9-pcie-port-9", addr
=> 0 },
304 hostpci9
=> { bus
=> "ich9-pcie-port-10", addr
=> 0 },
305 hostpci10
=> { bus
=> "ich9-pcie-port-11", addr
=> 0 },
306 hostpci11
=> { bus
=> "ich9-pcie-port-12", addr
=> 0 },
307 hostpci12
=> { bus
=> "ich9-pcie-port-13", addr
=> 0 },
308 hostpci13
=> { bus
=> "ich9-pcie-port-14", addr
=> 0 },
309 hostpci14
=> { bus
=> "ich9-pcie-port-15", addr
=> 0 },
310 hostpci15
=> { bus
=> "ich9-pcie-port-16", addr
=> 0 },
311 # win7 is picky about pcie assignments
312 hostpci0bus0
=> { bus
=> "pcie.0", addr
=> 16 },
313 hostpci1bus0
=> { bus
=> "pcie.0", addr
=> 17 },
314 hostpci2bus0
=> { bus
=> "pcie.0", addr
=> 18 },
315 hostpci3bus0
=> { bus
=> "pcie.0", addr
=> 19 },
316 ivshmem
=> { bus
=> 'pcie.0', addr
=> 20 },
317 hostpci4bus0
=> { bus
=> "pcie.0", addr
=> 9 },
318 hostpci5bus0
=> { bus
=> "pcie.0", addr
=> 10 },
319 hostpci6bus0
=> { bus
=> "pcie.0", addr
=> 11 },
320 hostpci7bus0
=> { bus
=> "pcie.0", addr
=> 12 },
321 hostpci8bus0
=> { bus
=> "pcie.0", addr
=> 13 },
322 hostpci9bus0
=> { bus
=> "pcie.0", addr
=> 14 },
323 hostpci10bus0
=> { bus
=> "pcie.0", addr
=> 15 },
324 hostpci11bus0
=> { bus
=> "pcie.0", addr
=> 21 },
325 hostpci12bus0
=> { bus
=> "pcie.0", addr
=> 22 },
326 hostpci13bus0
=> { bus
=> "pcie.0", addr
=> 23 },
327 hostpci14bus0
=> { bus
=> "pcie.0", addr
=> 24 },
328 hostpci15bus0
=> { bus
=> "pcie.0", addr
=> 25 },
329 } if !defined($pcie_addr_map);
331 return $pcie_addr_map;
334 sub print_pcie_addr
{
339 my $map = get_pcie_addr_map
($id);
340 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
341 $res = ",bus=$d->{bus},addr=$d->{addr}";
347 # Generates the device strings for additional pcie root ports. The first 4 pcie
348 # root ports are defined in the pve-q35*.cfg files.
349 sub print_pcie_root_port
{
353 my $root_port_addresses = {
368 if (defined($root_port_addresses->{$i})) {
370 $res = "pcie-root-port,id=ich9-pcie-port-${id}";
371 $res .= ",addr=$root_port_addresses->{$i}";
372 $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0";
373 $res .= ",port=${id},chassis=${id}";
384 my $res = PVE
::JSONSchema
::parse_property_string
($hostpci_fmt, $value);
386 my @idlist = split(/;/, $res->{host
});
388 foreach my $id (@idlist) {
389 my $devs = PVE
::SysFSTools
::lspci
($id);
390 die "no PCI device found for '$id'\n" if !scalar(@$devs);
391 push @{$res->{pciid
}}, @$devs;
396 sub print_hostpci_devices
{
397 my ($vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder) = @_;
400 my $gpu_passthrough = 0;
404 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
405 my $id = "hostpci$i";
406 my $d = parse_hostpci
($conf->{$id});
409 if (my $pcie = $d->{pcie
}) {
410 die "q35 machine model is not enabled" if !$q35;
411 # win7 wants to have the pcie devices directly on the pcie bus
412 # instead of in the root port
413 if ($winversion == 7) {
414 $pciaddr = print_pcie_addr
("${id}bus0");
416 # add more root ports if needed, 4 are present by default
417 # by pve-q35 cfgs, rest added here on demand.
419 push @$devices, '-device', print_pcie_root_port
($i);
421 $pciaddr = print_pcie_addr
($id);
424 my $pci_name = $d->{'legacy-igd'} ?
'legacy-igd' : $id;
425 $pciaddr = print_pci_addr
($pci_name, $bridges, $arch, $machine_type);
428 my $pcidevices = $d->{pciid
};
429 my $multifunction = @$pcidevices > 1;
431 if ($d->{'legacy-igd'}) {
432 die "only one device can be assigned in legacy-igd mode\n"
436 die "legacy IGD assignment requires VGA mode to be 'none'\n"
437 if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none';
438 die "legacy IGD assignment requires rombar to be enabled\n"
439 if defined($d->{rombar
}) && !$d->{rombar
};
440 die "legacy IGD assignment is not compatible with x-vga\n"
442 die "legacy IGD assignment is not compatible with mdev\n"
444 die "legacy IGD assignment is not compatible with q35\n"
446 die "legacy IGD assignment is not compatible with multifunction devices\n"
448 die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
449 if $pcidevices->[0]->{id
} !~ m/02\.0$/;
454 $xvga = ',x-vga=on' if !($conf->{bios
} && $conf->{bios
} eq 'ovmf');
456 $vga->{type
} = 'none' if !defined($conf->{vga
});
457 $gpu_passthrough = 1;
461 if ($d->{mdev
} && scalar(@$pcidevices) == 1) {
462 my $pci_id = $pcidevices->[0]->{id
};
463 my $uuid = generate_mdev_uuid
($vmid, $i);
464 $sysfspath = "/sys/bus/pci/devices/$pci_id/$uuid";
465 } elsif ($d->{mdev
}) {
466 warn "ignoring mediated device '$id' with multifunction device\n";
470 foreach my $pcidevice (@$pcidevices) {
471 my $devicestr = "vfio-pci";
474 $devicestr .= ",sysfsdev=$sysfspath";
476 $devicestr .= ",host=$pcidevice->{id}";
479 my $mf_addr = $multifunction ?
".$j" : '';
480 $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}";
483 $devicestr .= ',rombar=0' if defined($d->{rombar
}) && !$d->{rombar
};
484 $devicestr .= "$xvga";
485 $devicestr .= ",multifunction=on" if $multifunction;
486 $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile
};
487 $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id};
488 for my $option (qw(vendor-id device-id sub-vendor-id sub-device-id)) {
489 $devicestr .= ",x-pci-$option=$d->{$option}" if $d->{$option};
493 push @$devices, '-device', $devicestr;
498 return ($kvm_off, $gpu_passthrough, $legacy_igd);
501 sub prepare_pci_device
{
502 my ($vmid, $pciid, $index, $mdev) = @_;
504 my $info = PVE
::SysFSTools
::pci_device_info
("$pciid");
505 die "cannot prepare PCI pass-through, IOMMU not present\n" if !PVE
::SysFSTools
::check_iommu_support
();
506 die "no pci device info for device '$pciid'\n" if !$info;
509 my $uuid = generate_mdev_uuid
($vmid, $index);
510 PVE
::SysFSTools
::pci_create_mdev_device
($pciid, $uuid, $mdev);
512 die "can't unbind/bind PCI group to VFIO '$pciid'\n"
513 if !PVE
::SysFSTools
::pci_dev_group_bind_to_vfio
($pciid);
514 die "can't reset PCI device '$pciid'\n"
515 if $info->{has_fl_reset
} && !PVE
::SysFSTools
::pci_dev_reset
($info);
519 my $RUNDIR = '/run/qemu-server';
520 my $PCIID_RESERVATION_FILE = "${RUNDIR}/pci-id-reservations";
521 my $PCIID_RESERVATION_LOCK = "${PCIID_RESERVATION_FILE}.lock";
523 my $parse_pci_reservation_unlocked = sub {
525 if (my $fh = IO
::File-
>new($PCIID_RESERVATION_FILE, "r")) {
526 while (my $line = <$fh>) {
527 if ($line =~ m/^($PCIRE)\s(\d+)\s(time|pid)\:(\d+)$/) {
538 my $write_pci_reservation_unlocked = sub {
539 my ($reservations) = @_;
542 for my $pci_id (sort keys $reservations->%*) {
543 my ($vmid, $pid, $time) = $reservations->{$pci_id}->@{'vmid', 'pid', 'time'};
545 $data .= "$pci_id $vmid pid:$pid\n";
547 $data .= "$pci_id $vmid time:$time\n";
550 PVE
::Tools
::file_set_contents
($PCIID_RESERVATION_FILE, $data);
553 sub remove_pci_reservation
{
554 my ($dropped_ids) = @_;
556 $dropped_ids = [ $dropped_ids ] if !ref($dropped_ids);
557 return if !scalar(@$dropped_ids); # do nothing for empty list
559 PVE
::Tools
::lock_file
($PCIID_RESERVATION_LOCK, 2, sub {
560 my $reservation_list = $parse_pci_reservation_unlocked->();
561 delete $reservation_list->@{$dropped_ids->@*};
562 $write_pci_reservation_unlocked->($reservation_list);
567 sub reserve_pci_usage
{
568 my ($requested_ids, $vmid, $timeout, $pid) = @_;
570 $requested_ids = [ $requested_ids ] if !ref($requested_ids);
571 return if !scalar(@$requested_ids); # do nothing for empty list
573 PVE
::Tools
::lock_file
($PCIID_RESERVATION_LOCK, 5, sub {
574 my $reservation_list = $parse_pci_reservation_unlocked->();
577 for my $id ($requested_ids->@*) {
578 my $reservation = $reservation_list->{$id};
579 if ($reservation && $reservation->{vmid
} != $vmid) {
580 # check time based reservation
581 die "PCI device '$id' is currently reserved for use by VMID '$reservation->{vmid}'\n"
582 if defined($reservation->{time}) && $reservation->{time} > $ctime;
584 if (my $reserved_pid = $reservation->{pid
}) {
586 my $running_pid = PVE
::QemuServer
::Helpers
::vm_running_locally
($reservation->{vmid
});
587 if (defined($running_pid) && $running_pid == $reserved_pid) {
588 die "PCI device '$id' already in use by VMID '$reservation->{vmid}'\n";
590 warn "leftover PCI reservation found for $id, lets take it...\n";
595 $reservation_list->{$id} = { vmid
=> $vmid };
596 if (defined($pid)) { # VM started up, we can reserve now with the actual PID
597 $reservation_list->{$id}->{pid
} = $pid;
598 } elsif (defined($timeout)) { # tempoaray reserve as we don't now the PID yet
599 $reservation_list->{$id}->{time} = $ctime + $timeout + 5;
602 $write_pci_reservation_unlocked->($reservation_list);