]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer/PCI.pm
pci related code cleanups
[qemu-server.git] / PVE / QemuServer / PCI.pm
1 package PVE::QemuServer::PCI;
2
3 use warnings;
4 use strict;
5
6 use PVE::JSONSchema;
7 use PVE::SysFSTools;
8
9 use base 'Exporter';
10
11 our @EXPORT_OK = qw(
12 print_pci_addr
13 print_pcie_addr
14 print_pcie_root_port
15 parse_hostpci
16 );
17
18 our $MAX_HOSTPCI_DEVICES = 16;
19
20 my $PCIRE = qr/(?:[a-f0-9]{4}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
21 my $hostpci_fmt = {
22 host => {
23 default_key => 1,
24 type => 'string',
25 pattern => qr/$PCIRE(;$PCIRE)*/,
26 format_description => 'HOSTPCIID[;HOSTPCIID2...]',
27 description => <<EODESCR,
28 Host PCI device pass through. The PCI ID of a host's PCI device or a list
29 of PCI virtual functions of the host. HOSTPCIID syntax is:
30
31 'bus:dev.func' (hexadecimal numbers)
32
33 You can us the 'lspci' command to list existing PCI devices.
34 EODESCR
35 },
36 rombar => {
37 type => 'boolean',
38 description => "Specify whether or not the device's ROM will be visible in the"
39 ." guest's memory map.",
40 optional => 1,
41 default => 1,
42 },
43 romfile => {
44 type => 'string',
45 pattern => '[^,;]+',
46 format_description => 'string',
47 description => "Custom pci device rom filename (must be located in /usr/share/kvm/).",
48 optional => 1,
49 },
50 pcie => {
51 type => 'boolean',
52 description => "Choose the PCI-express bus (needs the 'q35' machine model).",
53 optional => 1,
54 default => 0,
55 },
56 'x-vga' => {
57 type => 'boolean',
58 description => "Enable vfio-vga device support.",
59 optional => 1,
60 default => 0,
61 },
62 'legacy-igd' => {
63 type => 'boolean',
64 description => "Pass this device in legacy IGD mode, making it the primary and exclusive"
65 ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.",
66 optional => 1,
67 default => 0,
68 },
69 'mdev' => {
70 type => 'string',
71 format_description => 'string',
72 pattern => '[^/\.:]+',
73 optional => 1,
74 description => <<EODESCR
75 The type of mediated device to use.
76 An instance of this type will be created on startup of the VM and
77 will be cleaned up when the VM stops.
78 EODESCR
79 }
80 };
81 PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
82
83 our $hostpcidesc = {
84 optional => 1,
85 type => 'string', format => 'pve-qm-hostpci',
86 description => "Map host PCI devices into guest.",
87 verbose_description => <<EODESCR,
88 Map host PCI devices into guest.
89
90 NOTE: This option allows direct access to host hardware. So it is no longer
91 possible to migrate such machines - use with special care.
92
93 CAUTION: Experimental! User reported problems with this option.
94 EODESCR
95 };
96 PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
97
98 my $pci_addr_map;
99 sub get_pci_addr_map {
100 $pci_addr_map = {
101 piix3 => { bus => 0, addr => 1, conflict_ok => qw(ehci) },
102 ehci => { bus => 0, addr => 1, conflict_ok => qw(piix3) }, # instead of piix3 on arm
103 vga => { bus => 0, addr => 2, conflict_ok => qw(legacy-igd) },
104 'legacy-igd' => { bus => 0, addr => 2, conflict_ok => qw(vga) }, # legacy-igd requires vga=none
105 balloon0 => { bus => 0, addr => 3 },
106 watchdog => { bus => 0, addr => 4 },
107 scsihw0 => { bus => 0, addr => 5, conflict_ok => qw(pci.3) },
108 'pci.3' => { bus => 0, addr => 5, conflict_ok => qw(scsihw0) }, # also used for virtio-scsi-single bridge
109 scsihw1 => { bus => 0, addr => 6 },
110 ahci0 => { bus => 0, addr => 7 },
111 qga0 => { bus => 0, addr => 8 },
112 spice => { bus => 0, addr => 9 },
113 virtio0 => { bus => 0, addr => 10 },
114 virtio1 => { bus => 0, addr => 11 },
115 virtio2 => { bus => 0, addr => 12 },
116 virtio3 => { bus => 0, addr => 13 },
117 virtio4 => { bus => 0, addr => 14 },
118 virtio5 => { bus => 0, addr => 15 },
119 hostpci0 => { bus => 0, addr => 16 },
120 hostpci1 => { bus => 0, addr => 17 },
121 net0 => { bus => 0, addr => 18 },
122 net1 => { bus => 0, addr => 19 },
123 net2 => { bus => 0, addr => 20 },
124 net3 => { bus => 0, addr => 21 },
125 net4 => { bus => 0, addr => 22 },
126 net5 => { bus => 0, addr => 23 },
127 vga1 => { bus => 0, addr => 24 },
128 vga2 => { bus => 0, addr => 25 },
129 vga3 => { bus => 0, addr => 26 },
130 hostpci2 => { bus => 0, addr => 27 },
131 hostpci3 => { bus => 0, addr => 28 },
132 #addr29 : usb-host (pve-usb.cfg)
133 'pci.1' => { bus => 0, addr => 30 },
134 'pci.2' => { bus => 0, addr => 31 },
135 'net6' => { bus => 1, addr => 1 },
136 'net7' => { bus => 1, addr => 2 },
137 'net8' => { bus => 1, addr => 3 },
138 'net9' => { bus => 1, addr => 4 },
139 'net10' => { bus => 1, addr => 5 },
140 'net11' => { bus => 1, addr => 6 },
141 'net12' => { bus => 1, addr => 7 },
142 'net13' => { bus => 1, addr => 8 },
143 'net14' => { bus => 1, addr => 9 },
144 'net15' => { bus => 1, addr => 10 },
145 'net16' => { bus => 1, addr => 11 },
146 'net17' => { bus => 1, addr => 12 },
147 'net18' => { bus => 1, addr => 13 },
148 'net19' => { bus => 1, addr => 14 },
149 'net20' => { bus => 1, addr => 15 },
150 'net21' => { bus => 1, addr => 16 },
151 'net22' => { bus => 1, addr => 17 },
152 'net23' => { bus => 1, addr => 18 },
153 'net24' => { bus => 1, addr => 19 },
154 'net25' => { bus => 1, addr => 20 },
155 'net26' => { bus => 1, addr => 21 },
156 'net27' => { bus => 1, addr => 22 },
157 'net28' => { bus => 1, addr => 23 },
158 'net29' => { bus => 1, addr => 24 },
159 'net30' => { bus => 1, addr => 25 },
160 'net31' => { bus => 1, addr => 26 },
161 'xhci' => { bus => 1, addr => 27 },
162 'pci.4' => { bus => 1, addr => 28 },
163 'rng0' => { bus => 1, addr => 29 },
164 'pci.2-igd' => { bus => 1, addr => 30 }, # replaces pci.2 in case a legacy IGD device is passed through
165 'virtio6' => { bus => 2, addr => 1 },
166 'virtio7' => { bus => 2, addr => 2 },
167 'virtio8' => { bus => 2, addr => 3 },
168 'virtio9' => { bus => 2, addr => 4 },
169 'virtio10' => { bus => 2, addr => 5 },
170 'virtio11' => { bus => 2, addr => 6 },
171 'virtio12' => { bus => 2, addr => 7 },
172 'virtio13' => { bus => 2, addr => 8 },
173 'virtio14' => { bus => 2, addr => 9 },
174 'virtio15' => { bus => 2, addr => 10 },
175 'ivshmem' => { bus => 2, addr => 11 },
176 'audio0' => { bus => 2, addr => 12 },
177 hostpci4 => { bus => 2, addr => 13 },
178 hostpci5 => { bus => 2, addr => 14 },
179 hostpci6 => { bus => 2, addr => 15 },
180 hostpci7 => { bus => 2, addr => 16 },
181 hostpci8 => { bus => 2, addr => 17 },
182 hostpci9 => { bus => 2, addr => 18 },
183 hostpci10 => { bus => 2, addr => 19 },
184 hostpci11 => { bus => 2, addr => 20 },
185 hostpci12 => { bus => 2, addr => 21 },
186 hostpci13 => { bus => 2, addr => 22 },
187 hostpci14 => { bus => 2, addr => 23 },
188 hostpci15 => { bus => 2, addr => 24 },
189 'virtioscsi0' => { bus => 3, addr => 1 },
190 'virtioscsi1' => { bus => 3, addr => 2 },
191 'virtioscsi2' => { bus => 3, addr => 3 },
192 'virtioscsi3' => { bus => 3, addr => 4 },
193 'virtioscsi4' => { bus => 3, addr => 5 },
194 'virtioscsi5' => { bus => 3, addr => 6 },
195 'virtioscsi6' => { bus => 3, addr => 7 },
196 'virtioscsi7' => { bus => 3, addr => 8 },
197 'virtioscsi8' => { bus => 3, addr => 9 },
198 'virtioscsi9' => { bus => 3, addr => 10 },
199 'virtioscsi10' => { bus => 3, addr => 11 },
200 'virtioscsi11' => { bus => 3, addr => 12 },
201 'virtioscsi12' => { bus => 3, addr => 13 },
202 'virtioscsi13' => { bus => 3, addr => 14 },
203 'virtioscsi14' => { bus => 3, addr => 15 },
204 'virtioscsi15' => { bus => 3, addr => 16 },
205 'virtioscsi16' => { bus => 3, addr => 17 },
206 'virtioscsi17' => { bus => 3, addr => 18 },
207 'virtioscsi18' => { bus => 3, addr => 19 },
208 'virtioscsi19' => { bus => 3, addr => 20 },
209 'virtioscsi20' => { bus => 3, addr => 21 },
210 'virtioscsi21' => { bus => 3, addr => 22 },
211 'virtioscsi22' => { bus => 3, addr => 23 },
212 'virtioscsi23' => { bus => 3, addr => 24 },
213 'virtioscsi24' => { bus => 3, addr => 25 },
214 'virtioscsi25' => { bus => 3, addr => 26 },
215 'virtioscsi26' => { bus => 3, addr => 27 },
216 'virtioscsi27' => { bus => 3, addr => 28 },
217 'virtioscsi28' => { bus => 3, addr => 29 },
218 'virtioscsi29' => { bus => 3, addr => 30 },
219 'virtioscsi30' => { bus => 3, addr => 31 },
220 'scsihw2' => { bus => 4, addr => 1 },
221 'scsihw3' => { bus => 4, addr => 2 },
222 'scsihw4' => { bus => 4, addr => 3 },
223 } if !defined($pci_addr_map);
224 return $pci_addr_map;
225 }
226
227 my sub generate_mdev_uuid {
228 my ($vmid, $index) = @_;
229 return sprintf("%08d-0000-0000-0000-%012d", $index, $vmid);
230 }
231
232 my $get_addr_mapping_from_id = sub {
233 my ($map, $id) = @_;
234
235 my $d = $map->{$id};
236 return if !defined($d) || !defined($d->{bus}) || !defined($d->{addr});
237
238 return { bus => $d->{bus}, addr => sprintf("0x%x", $d->{addr}) };
239 };
240
241 sub print_pci_addr {
242 my ($id, $bridges, $arch, $machine) = @_;
243
244 my $res = '';
245
246 # using same bus slots on all HW, so we need to check special cases here:
247 my $busname = 'pci';
248 if ($arch eq 'aarch64' && $machine =~ /^virt/) {
249 die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide/;
250 $busname = 'pcie';
251 }
252
253 my $map = get_pci_addr_map();
254 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
255 $res = ",bus=$busname.$d->{bus},addr=$d->{addr}";
256 $bridges->{$d->{bus}} = 1 if $bridges;
257 }
258
259 return $res;
260 }
261
262 my $pcie_addr_map;
263 sub get_pcie_addr_map {
264 $pcie_addr_map = {
265 vga => { bus => 'pcie.0', addr => 1 },
266 hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 },
267 hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 },
268 hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 },
269 hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 },
270 hostpci4 => { bus => "ich9-pcie-port-5", addr => 0 },
271 hostpci5 => { bus => "ich9-pcie-port-6", addr => 0 },
272 hostpci6 => { bus => "ich9-pcie-port-7", addr => 0 },
273 hostpci7 => { bus => "ich9-pcie-port-8", addr => 0 },
274 hostpci8 => { bus => "ich9-pcie-port-9", addr => 0 },
275 hostpci9 => { bus => "ich9-pcie-port-10", addr => 0 },
276 hostpci10 => { bus => "ich9-pcie-port-11", addr => 0 },
277 hostpci11 => { bus => "ich9-pcie-port-12", addr => 0 },
278 hostpci12 => { bus => "ich9-pcie-port-13", addr => 0 },
279 hostpci13 => { bus => "ich9-pcie-port-14", addr => 0 },
280 hostpci14 => { bus => "ich9-pcie-port-15", addr => 0 },
281 hostpci15 => { bus => "ich9-pcie-port-16", addr => 0 },
282 # win7 is picky about pcie assignments
283 hostpci0bus0 => { bus => "pcie.0", addr => 16 },
284 hostpci1bus0 => { bus => "pcie.0", addr => 17 },
285 hostpci2bus0 => { bus => "pcie.0", addr => 18 },
286 hostpci3bus0 => { bus => "pcie.0", addr => 19 },
287 ivshmem => { bus => 'pcie.0', addr => 20 },
288 hostpci4bus0 => { bus => "pcie.0", addr => 9 },
289 hostpci5bus0 => { bus => "pcie.0", addr => 10 },
290 hostpci6bus0 => { bus => "pcie.0", addr => 11 },
291 hostpci7bus0 => { bus => "pcie.0", addr => 12 },
292 hostpci8bus0 => { bus => "pcie.0", addr => 13 },
293 hostpci9bus0 => { bus => "pcie.0", addr => 14 },
294 hostpci10bus0 => { bus => "pcie.0", addr => 15 },
295 hostpci11bus0 => { bus => "pcie.0", addr => 21 },
296 hostpci12bus0 => { bus => "pcie.0", addr => 22 },
297 hostpci13bus0 => { bus => "pcie.0", addr => 23 },
298 hostpci14bus0 => { bus => "pcie.0", addr => 24 },
299 hostpci15bus0 => { bus => "pcie.0", addr => 25 },
300 } if !defined($pcie_addr_map);
301
302 return $pcie_addr_map;
303 }
304
305 sub print_pcie_addr {
306 my ($id) = @_;
307
308 my $res = '';
309
310 my $map = get_pcie_addr_map($id);
311 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
312 $res = ",bus=$d->{bus},addr=$d->{addr}";
313 }
314
315 return $res;
316 }
317
318 # Generates the device strings for additional pcie root ports. The first 4 pcie
319 # root ports are defined in the pve-q35*.cfg files.
320 sub print_pcie_root_port {
321 my ($i) = @_;
322 my $res = '';
323
324 my $root_port_addresses = {
325 4 => "10.0",
326 5 => "10.1",
327 6 => "10.2",
328 7 => "10.3",
329 8 => "10.4",
330 9 => "10.5",
331 10 => "10.6",
332 11 => "10.7",
333 12 => "11.0",
334 13 => "11.1",
335 14 => "11.2",
336 15 => "11.3",
337 };
338
339 if (defined($root_port_addresses->{$i})) {
340 my $id = $i + 1;
341 $res = "pcie-root-port,id=ich9-pcie-port-${id}";
342 $res .= ",addr=$root_port_addresses->{$i}";
343 $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0";
344 $res .= ",port=${id},chassis=${id}";
345 }
346
347 return $res;
348 }
349
350 sub parse_hostpci {
351 my ($value) = @_;
352
353 return if !$value;
354
355 my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value);
356
357 my @idlist = split(/;/, $res->{host});
358 delete $res->{host};
359 foreach my $id (@idlist) {
360 my $devs = PVE::SysFSTools::lspci($id);
361 die "no PCI device found for '$id'\n" if !scalar(@$devs);
362 push @{$res->{pciid}}, @$devs;
363 }
364 return $res;
365 }
366
367 sub print_hostpci_devices {
368 my ($vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder) = @_;
369
370 my $kvm_off = 0;
371 my $gpu_passthrough = 0;
372 my $legacy_igd = 0;
373
374 my $pciaddr;
375 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
376 my $id = "hostpci$i";
377 my $d = parse_hostpci($conf->{$id});
378 next if !$d;
379
380 if (my $pcie = $d->{pcie}) {
381 die "q35 machine model is not enabled" if !$q35;
382 # win7 wants to have the pcie devices directly on the pcie bus
383 # instead of in the root port
384 if ($winversion == 7) {
385 $pciaddr = print_pcie_addr("${id}bus0");
386 } else {
387 # add more root ports if needed, 4 are present by default
388 # by pve-q35 cfgs, rest added here on demand.
389 if ($i > 3) {
390 push @$devices, '-device', print_pcie_root_port($i);
391 }
392 $pciaddr = print_pcie_addr($id);
393 }
394 } else {
395 my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id;
396 $pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type);
397 }
398
399 my $pcidevices = $d->{pciid};
400 my $multifunction = @$pcidevices > 1;
401
402 if ($d->{'legacy-igd'}) {
403 die "only one device can be assigned in legacy-igd mode\n"
404 if $legacy_igd;
405 $legacy_igd = 1;
406
407 die "legacy IGD assignment requires VGA mode to be 'none'\n"
408 if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none';
409 die "legacy IGD assignment requires rombar to be enabled\n"
410 if defined($d->{rombar}) && !$d->{rombar};
411 die "legacy IGD assignment is not compatible with x-vga\n"
412 if $d->{'x-vga'};
413 die "legacy IGD assignment is not compatible with mdev\n"
414 if $d->{mdev};
415 die "legacy IGD assignment is not compatible with q35\n"
416 if $q35;
417 die "legacy IGD assignment is not compatible with multifunction devices\n"
418 if $multifunction;
419 die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
420 if $pcidevices->[0]->{id} !~ m/02\.0$/;
421 }
422
423 my $xvga = '';
424 if ($d->{'x-vga'}) {
425 $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf');
426 $kvm_off = 1;
427 $vga->{type} = 'none' if !defined($conf->{vga});
428 $gpu_passthrough = 1;
429 }
430
431 my $sysfspath;
432 if ($d->{mdev} && scalar(@$pcidevices) == 1) {
433 my $pci_id = $pcidevices->[0]->{id};
434 my $uuid = generate_mdev_uuid($vmid, $i);
435 $sysfspath = "/sys/bus/pci/devices/$pci_id/$uuid";
436 } elsif ($d->{mdev}) {
437 warn "ignoring mediated device '$id' with multifunction device\n";
438 }
439
440 my $j = 0;
441 foreach my $pcidevice (@$pcidevices) {
442 my $devicestr = "vfio-pci";
443
444 if ($sysfspath) {
445 $devicestr .= ",sysfsdev=$sysfspath";
446 } else {
447 $devicestr .= ",host=$pcidevice->{id}";
448 }
449
450 my $mf_addr = $multifunction ? ".$j" : '';
451 $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}";
452
453 if ($j == 0) {
454 $devicestr .= ',rombar=0' if defined($d->{rombar}) && !$d->{rombar};
455 $devicestr .= "$xvga";
456 $devicestr .= ",multifunction=on" if $multifunction;
457 $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile};
458 $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id};
459 }
460
461 push @$devices, '-device', $devicestr;
462 $j++;
463 }
464 }
465
466 return ($kvm_off, $gpu_passthrough, $legacy_igd);
467 }
468
469 sub prepare_pci_device {
470 my ($vmid, $pciid, $index, $mdev) = @_;
471
472 my $info = PVE::SysFSTools::pci_device_info("$pciid");
473 die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
474 die "no pci device info for device '$pciid'\n" if !$info;
475
476 if ($mdev) {
477 my $uuid = generate_mdev_uuid($vmid, $index);
478 PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
479 } else {
480 die "can't unbind/bind PCI group to VFIO '$pciid'\n"
481 if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
482 die "can't reset PCI device '$pciid'\n"
483 if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info);
484 }
485 }
486
487 1;