]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer/PCI.pm
pci: refactor pci device preparation
[qemu-server.git] / PVE / QemuServer / PCI.pm
CommitLineData
de9768f0
DC
1package PVE::QemuServer::PCI;
2
41af2dfc
TL
3use warnings;
4use strict;
5
74c17b7a
SR
6use PVE::JSONSchema;
7use PVE::SysFSTools;
8
de9768f0
DC
9use base 'Exporter';
10
11our @EXPORT_OK = qw(
12print_pci_addr
13print_pcie_addr
c4e16381 14print_pcie_root_port
74c17b7a 15parse_hostpci
de9768f0
DC
16);
17
74c17b7a
SR
18our $MAX_HOSTPCI_DEVICES = 16;
19
a4d5b84c 20my $PCIRE = qr/(?:[a-f0-9]{4}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
74c17b7a
SR
21my $hostpci_fmt = {
22 host => {
23 default_key => 1,
24 type => 'string',
25 pattern => qr/$PCIRE(;$PCIRE)*/,
26 format_description => 'HOSTPCIID[;HOSTPCIID2...]',
27 description => <<EODESCR,
28Host PCI device pass through. The PCI ID of a host's PCI device or a list
29of PCI virtual functions of the host. HOSTPCIID syntax is:
30
31'bus:dev.func' (hexadecimal numbers)
32
33You can us the 'lspci' command to list existing PCI devices.
34EODESCR
35 },
36 rombar => {
37 type => 'boolean',
1fac3a0b
TL
38 description => "Specify whether or not the device's ROM will be visible in the"
39 ." guest's memory map.",
74c17b7a
SR
40 optional => 1,
41 default => 1,
42 },
43 romfile => {
1fac3a0b
TL
44 type => 'string',
45 pattern => '[^,;]+',
46 format_description => 'string',
47 description => "Custom pci device rom filename (must be located in /usr/share/kvm/).",
48 optional => 1,
74c17b7a
SR
49 },
50 pcie => {
51 type => 'boolean',
1fac3a0b 52 description => "Choose the PCI-express bus (needs the 'q35' machine model).",
74c17b7a
SR
53 optional => 1,
54 default => 0,
55 },
56 'x-vga' => {
57 type => 'boolean',
1fac3a0b 58 description => "Enable vfio-vga device support.",
74c17b7a
SR
59 optional => 1,
60 default => 0,
61 },
13d68979
SR
62 'legacy-igd' => {
63 type => 'boolean',
1fac3a0b
TL
64 description => "Pass this device in legacy IGD mode, making it the primary and exclusive"
65 ." graphics device in the VM. Requires 'pc-i440fx' machine type and VGA set to 'none'.",
13d68979
SR
66 optional => 1,
67 default => 0,
68 },
74c17b7a
SR
69 'mdev' => {
70 type => 'string',
1fac3a0b 71 format_description => 'string',
74c17b7a
SR
72 pattern => '[^/\.:]+',
73 optional => 1,
74 description => <<EODESCR
75The type of mediated device to use.
76An instance of this type will be created on startup of the VM and
77will be cleaned up when the VM stops.
78EODESCR
79 }
80};
81PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
82
83our $hostpcidesc = {
1fac3a0b
TL
84 optional => 1,
85 type => 'string', format => 'pve-qm-hostpci',
86 description => "Map host PCI devices into guest.",
74c17b7a
SR
87 verbose_description => <<EODESCR,
88Map host PCI devices into guest.
89
90NOTE: This option allows direct access to host hardware. So it is no longer
91possible to migrate such machines - use with special care.
92
93CAUTION: Experimental! User reported problems with this option.
94EODESCR
95};
96PVE::JSONSchema::register_standard_option("pve-qm-hostpci", $hostpcidesc);
97
d7d698f6
TL
98my $pci_addr_map;
99sub get_pci_addr_map {
100 $pci_addr_map = {
101 piix3 => { bus => 0, addr => 1, conflict_ok => qw(ehci) },
102 ehci => { bus => 0, addr => 1, conflict_ok => qw(piix3) }, # instead of piix3 on arm
13d68979
SR
103 vga => { bus => 0, addr => 2, conflict_ok => qw(legacy-igd) },
104 'legacy-igd' => { bus => 0, addr => 2, conflict_ok => qw(vga) }, # legacy-igd requires vga=none
d7d698f6
TL
105 balloon0 => { bus => 0, addr => 3 },
106 watchdog => { bus => 0, addr => 4 },
107 scsihw0 => { bus => 0, addr => 5, conflict_ok => qw(pci.3) },
108 'pci.3' => { bus => 0, addr => 5, conflict_ok => qw(scsihw0) }, # also used for virtio-scsi-single bridge
109 scsihw1 => { bus => 0, addr => 6 },
110 ahci0 => { bus => 0, addr => 7 },
111 qga0 => { bus => 0, addr => 8 },
112 spice => { bus => 0, addr => 9 },
113 virtio0 => { bus => 0, addr => 10 },
114 virtio1 => { bus => 0, addr => 11 },
115 virtio2 => { bus => 0, addr => 12 },
116 virtio3 => { bus => 0, addr => 13 },
117 virtio4 => { bus => 0, addr => 14 },
118 virtio5 => { bus => 0, addr => 15 },
119 hostpci0 => { bus => 0, addr => 16 },
120 hostpci1 => { bus => 0, addr => 17 },
121 net0 => { bus => 0, addr => 18 },
122 net1 => { bus => 0, addr => 19 },
123 net2 => { bus => 0, addr => 20 },
124 net3 => { bus => 0, addr => 21 },
125 net4 => { bus => 0, addr => 22 },
126 net5 => { bus => 0, addr => 23 },
127 vga1 => { bus => 0, addr => 24 },
128 vga2 => { bus => 0, addr => 25 },
129 vga3 => { bus => 0, addr => 26 },
130 hostpci2 => { bus => 0, addr => 27 },
131 hostpci3 => { bus => 0, addr => 28 },
132 #addr29 : usb-host (pve-usb.cfg)
133 'pci.1' => { bus => 0, addr => 30 },
134 'pci.2' => { bus => 0, addr => 31 },
135 'net6' => { bus => 1, addr => 1 },
136 'net7' => { bus => 1, addr => 2 },
137 'net8' => { bus => 1, addr => 3 },
138 'net9' => { bus => 1, addr => 4 },
139 'net10' => { bus => 1, addr => 5 },
140 'net11' => { bus => 1, addr => 6 },
141 'net12' => { bus => 1, addr => 7 },
142 'net13' => { bus => 1, addr => 8 },
143 'net14' => { bus => 1, addr => 9 },
144 'net15' => { bus => 1, addr => 10 },
145 'net16' => { bus => 1, addr => 11 },
146 'net17' => { bus => 1, addr => 12 },
147 'net18' => { bus => 1, addr => 13 },
148 'net19' => { bus => 1, addr => 14 },
149 'net20' => { bus => 1, addr => 15 },
150 'net21' => { bus => 1, addr => 16 },
151 'net22' => { bus => 1, addr => 17 },
152 'net23' => { bus => 1, addr => 18 },
153 'net24' => { bus => 1, addr => 19 },
154 'net25' => { bus => 1, addr => 20 },
155 'net26' => { bus => 1, addr => 21 },
156 'net27' => { bus => 1, addr => 22 },
157 'net28' => { bus => 1, addr => 23 },
158 'net29' => { bus => 1, addr => 24 },
159 'net30' => { bus => 1, addr => 25 },
160 'net31' => { bus => 1, addr => 26 },
161 'xhci' => { bus => 1, addr => 27 },
2513b862 162 'pci.4' => { bus => 1, addr => 28 },
2cf61f33 163 'rng0' => { bus => 1, addr => 29 },
13d68979 164 'pci.2-igd' => { bus => 1, addr => 30 }, # replaces pci.2 in case a legacy IGD device is passed through
d7d698f6
TL
165 'virtio6' => { bus => 2, addr => 1 },
166 'virtio7' => { bus => 2, addr => 2 },
167 'virtio8' => { bus => 2, addr => 3 },
168 'virtio9' => { bus => 2, addr => 4 },
169 'virtio10' => { bus => 2, addr => 5 },
170 'virtio11' => { bus => 2, addr => 6 },
171 'virtio12' => { bus => 2, addr => 7 },
172 'virtio13' => { bus => 2, addr => 8 },
173 'virtio14' => { bus => 2, addr => 9 },
174 'virtio15' => { bus => 2, addr => 10 },
175 'ivshmem' => { bus => 2, addr => 11 },
176 'audio0' => { bus => 2, addr => 12 },
177 hostpci4 => { bus => 2, addr => 13 },
178 hostpci5 => { bus => 2, addr => 14 },
179 hostpci6 => { bus => 2, addr => 15 },
180 hostpci7 => { bus => 2, addr => 16 },
181 hostpci8 => { bus => 2, addr => 17 },
182 hostpci9 => { bus => 2, addr => 18 },
183 hostpci10 => { bus => 2, addr => 19 },
184 hostpci11 => { bus => 2, addr => 20 },
185 hostpci12 => { bus => 2, addr => 21 },
186 hostpci13 => { bus => 2, addr => 22 },
187 hostpci14 => { bus => 2, addr => 23 },
188 hostpci15 => { bus => 2, addr => 24 },
189 'virtioscsi0' => { bus => 3, addr => 1 },
190 'virtioscsi1' => { bus => 3, addr => 2 },
191 'virtioscsi2' => { bus => 3, addr => 3 },
192 'virtioscsi3' => { bus => 3, addr => 4 },
193 'virtioscsi4' => { bus => 3, addr => 5 },
194 'virtioscsi5' => { bus => 3, addr => 6 },
195 'virtioscsi6' => { bus => 3, addr => 7 },
196 'virtioscsi7' => { bus => 3, addr => 8 },
197 'virtioscsi8' => { bus => 3, addr => 9 },
198 'virtioscsi9' => { bus => 3, addr => 10 },
199 'virtioscsi10' => { bus => 3, addr => 11 },
200 'virtioscsi11' => { bus => 3, addr => 12 },
201 'virtioscsi12' => { bus => 3, addr => 13 },
202 'virtioscsi13' => { bus => 3, addr => 14 },
203 'virtioscsi14' => { bus => 3, addr => 15 },
204 'virtioscsi15' => { bus => 3, addr => 16 },
205 'virtioscsi16' => { bus => 3, addr => 17 },
206 'virtioscsi17' => { bus => 3, addr => 18 },
207 'virtioscsi18' => { bus => 3, addr => 19 },
208 'virtioscsi19' => { bus => 3, addr => 20 },
209 'virtioscsi20' => { bus => 3, addr => 21 },
210 'virtioscsi21' => { bus => 3, addr => 22 },
211 'virtioscsi22' => { bus => 3, addr => 23 },
212 'virtioscsi23' => { bus => 3, addr => 24 },
213 'virtioscsi24' => { bus => 3, addr => 25 },
214 'virtioscsi25' => { bus => 3, addr => 26 },
215 'virtioscsi26' => { bus => 3, addr => 27 },
216 'virtioscsi27' => { bus => 3, addr => 28 },
217 'virtioscsi28' => { bus => 3, addr => 29 },
218 'virtioscsi29' => { bus => 3, addr => 30 },
219 'virtioscsi30' => { bus => 3, addr => 31 },
2513b862
DC
220 'scsihw2' => { bus => 4, addr => 1 },
221 'scsihw3' => { bus => 4, addr => 2 },
222 'scsihw4' => { bus => 4, addr => 3 },
d7d698f6
TL
223 } if !defined($pci_addr_map);
224 return $pci_addr_map;
225}
226
227my $get_addr_mapping_from_id = sub {
228 my ($map, $id) = @_;
229
230 my $d = $map->{$id};
d1c1af4b 231 return if !defined($d) || !defined($d->{bus}) || !defined($d->{addr});
d7d698f6
TL
232
233 return { bus => $d->{bus}, addr => sprintf("0x%x", $d->{addr}) };
de9768f0
DC
234};
235
236sub print_pci_addr {
d559309f 237 my ($id, $bridges, $arch, $machine) = @_;
de9768f0
DC
238
239 my $res = '';
240
d7d698f6 241 # using same bus slots on all HW, so we need to check special cases here:
d559309f
WB
242 my $busname = 'pci';
243 if ($arch eq 'aarch64' && $machine =~ /^virt/) {
d7d698f6 244 die "aarch64/virt cannot use IDE devices\n" if $id =~ /^ide/;
d559309f
WB
245 $busname = 'pcie';
246 }
247
d7d698f6
TL
248 my $map = get_pci_addr_map();
249 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
250 $res = ",bus=$busname.$d->{bus},addr=$d->{addr}";
251 $bridges->{$d->{bus}} = 1 if $bridges;
de9768f0 252 }
de9768f0 253
d7d698f6 254 return $res;
de9768f0
DC
255}
256
d7d698f6
TL
257my $pcie_addr_map;
258sub get_pcie_addr_map {
259 $pcie_addr_map = {
55655ebc 260 vga => { bus => 'pcie.0', addr => 1 },
de9768f0
DC
261 hostpci0 => { bus => "ich9-pcie-port-1", addr => 0 },
262 hostpci1 => { bus => "ich9-pcie-port-2", addr => 0 },
263 hostpci2 => { bus => "ich9-pcie-port-3", addr => 0 },
264 hostpci3 => { bus => "ich9-pcie-port-4", addr => 0 },
c4e16381
AL
265 hostpci4 => { bus => "ich9-pcie-port-5", addr => 0 },
266 hostpci5 => { bus => "ich9-pcie-port-6", addr => 0 },
267 hostpci6 => { bus => "ich9-pcie-port-7", addr => 0 },
268 hostpci7 => { bus => "ich9-pcie-port-8", addr => 0 },
269 hostpci8 => { bus => "ich9-pcie-port-9", addr => 0 },
270 hostpci9 => { bus => "ich9-pcie-port-10", addr => 0 },
271 hostpci10 => { bus => "ich9-pcie-port-11", addr => 0 },
272 hostpci11 => { bus => "ich9-pcie-port-12", addr => 0 },
273 hostpci12 => { bus => "ich9-pcie-port-13", addr => 0 },
274 hostpci13 => { bus => "ich9-pcie-port-14", addr => 0 },
275 hostpci14 => { bus => "ich9-pcie-port-15", addr => 0 },
276 hostpci15 => { bus => "ich9-pcie-port-16", addr => 0 },
739ba340
DC
277 # win7 is picky about pcie assignments
278 hostpci0bus0 => { bus => "pcie.0", addr => 16 },
279 hostpci1bus0 => { bus => "pcie.0", addr => 17 },
280 hostpci2bus0 => { bus => "pcie.0", addr => 18 },
281 hostpci3bus0 => { bus => "pcie.0", addr => 19 },
6dbcb073 282 ivshmem => { bus => 'pcie.0', addr => 20 },
c4e16381
AL
283 hostpci4bus0 => { bus => "pcie.0", addr => 9 },
284 hostpci5bus0 => { bus => "pcie.0", addr => 10 },
285 hostpci6bus0 => { bus => "pcie.0", addr => 11 },
286 hostpci7bus0 => { bus => "pcie.0", addr => 12 },
287 hostpci8bus0 => { bus => "pcie.0", addr => 13 },
288 hostpci9bus0 => { bus => "pcie.0", addr => 14 },
289 hostpci10bus0 => { bus => "pcie.0", addr => 15 },
e2b0d85d
TL
290 hostpci11bus0 => { bus => "pcie.0", addr => 21 },
291 hostpci12bus0 => { bus => "pcie.0", addr => 22 },
292 hostpci13bus0 => { bus => "pcie.0", addr => 23 },
293 hostpci14bus0 => { bus => "pcie.0", addr => 24 },
294 hostpci15bus0 => { bus => "pcie.0", addr => 25 },
d7d698f6
TL
295 } if !defined($pcie_addr_map);
296
297 return $pcie_addr_map;
298}
299
300sub print_pcie_addr {
301 my ($id) = @_;
302
303 my $res = '';
de9768f0 304
d7d698f6
TL
305 my $map = get_pcie_addr_map($id);
306 if (my $d = $get_addr_mapping_from_id->($map, $id)) {
307 $res = ",bus=$d->{bus},addr=$d->{addr}";
de9768f0 308 }
de9768f0 309
d7d698f6 310 return $res;
de9768f0 311}
b71351a7 312
c4e16381
AL
313# Generates the device strings for additional pcie root ports. The first 4 pcie
314# root ports are defined in the pve-q35*.cfg files.
315sub print_pcie_root_port {
316 my ($i) = @_;
317 my $res = '';
318
c4e16381 319 my $root_port_addresses = {
e2b0d85d
TL
320 4 => "10.0",
321 5 => "10.1",
322 6 => "10.2",
323 7 => "10.3",
324 8 => "10.4",
325 9 => "10.5",
c4e16381
AL
326 10 => "10.6",
327 11 => "10.7",
328 12 => "11.0",
329 13 => "11.1",
330 14 => "11.2",
331 15 => "11.3",
332 };
333
334 if (defined($root_port_addresses->{$i})) {
e2b0d85d 335 my $id = $i + 1;
c4e16381
AL
336 $res = "pcie-root-port,id=ich9-pcie-port-${id}";
337 $res .= ",addr=$root_port_addresses->{$i}";
338 $res .= ",x-speed=16,x-width=32,multifunction=on,bus=pcie.0";
339 $res .= ",port=${id},chassis=${id}";
340 }
341
342 return $res;
343}
344
74c17b7a
SR
345sub parse_hostpci {
346 my ($value) = @_;
347
d1c1af4b 348 return if !$value;
74c17b7a
SR
349
350 my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value);
351
352 my @idlist = split(/;/, $res->{host});
353 delete $res->{host};
354 foreach my $id (@idlist) {
355 my $devs = PVE::SysFSTools::lspci($id);
356 die "no PCI device found for '$id'\n" if !scalar(@$devs);
357 push @{$res->{pciid}}, @$devs;
358 }
359 return $res;
360}
361
362sub print_hostpci_devices {
41af2dfc 363 my ($vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder) = @_;
74c17b7a
SR
364
365 my $kvm_off = 0;
366 my $gpu_passthrough = 0;
13d68979 367 my $legacy_igd = 0;
74c17b7a 368
f7d1505b 369 my $pciaddr;
74c17b7a
SR
370 for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
371 my $id = "hostpci$i";
372 my $d = parse_hostpci($conf->{$id});
373 next if !$d;
374
375 if (my $pcie = $d->{pcie}) {
376 die "q35 machine model is not enabled" if !$q35;
377 # win7 wants to have the pcie devices directly on the pcie bus
378 # instead of in the root port
379 if ($winversion == 7) {
380 $pciaddr = print_pcie_addr("${id}bus0");
381 } else {
382 # add more root ports if needed, 4 are present by default
383 # by pve-q35 cfgs, rest added here on demand.
384 if ($i > 3) {
385 push @$devices, '-device', print_pcie_root_port($i);
386 }
387 $pciaddr = print_pcie_addr($id);
388 }
389 } else {
13d68979
SR
390 my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id;
391 $pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type);
392 }
393
394 my $pcidevices = $d->{pciid};
f7d1505b 395 my $multifunction = @$pcidevices > 1;
13d68979
SR
396
397 if ($d->{'legacy-igd'}) {
398 die "only one device can be assigned in legacy-igd mode\n"
399 if $legacy_igd;
400 $legacy_igd = 1;
401
402 die "legacy IGD assignment requires VGA mode to be 'none'\n"
403 if !defined($conf->{'vga'}) || $conf->{'vga'} ne 'none';
404 die "legacy IGD assignment requires rombar to be enabled\n"
405 if defined($d->{rombar}) && !$d->{rombar};
406 die "legacy IGD assignment is not compatible with x-vga\n"
407 if $d->{'x-vga'};
408 die "legacy IGD assignment is not compatible with mdev\n"
409 if $d->{mdev};
410 die "legacy IGD assignment is not compatible with q35\n"
411 if $q35;
412 die "legacy IGD assignment is not compatible with multifunction devices\n"
413 if $multifunction;
414 die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
415 if $pcidevices->[0]->{id} !~ m/02\.0$/;
74c17b7a
SR
416 }
417
418 my $xvga = '';
419 if ($d->{'x-vga'}) {
420 $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf');
421 $kvm_off = 1;
422 $vga->{type} = 'none' if !defined($conf->{vga});
423 $gpu_passthrough = 1;
424 }
425
74c17b7a
SR
426 my $sysfspath;
427 if ($d->{mdev} && scalar(@$pcidevices) == 1) {
428 my $pci_id = $pcidevices->[0]->{id};
429 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $i);
430 $sysfspath = "/sys/bus/pci/devices/$pci_id/$uuid";
431 } elsif ($d->{mdev}) {
432 warn "ignoring mediated device '$id' with multifunction device\n";
433 }
434
1fac3a0b 435 my $j = 0;
74c17b7a
SR
436 foreach my $pcidevice (@$pcidevices) {
437 my $devicestr = "vfio-pci";
438
439 if ($sysfspath) {
440 $devicestr .= ",sysfsdev=$sysfspath";
441 } else {
442 $devicestr .= ",host=$pcidevice->{id}";
443 }
444
445 my $mf_addr = $multifunction ? ".$j" : '';
446 $devicestr .= ",id=${id}${mf_addr}${pciaddr}${mf_addr}";
447
448 if ($j == 0) {
449 $devicestr .= ',rombar=0' if defined($d->{rombar}) && !$d->{rombar};
450 $devicestr .= "$xvga";
451 $devicestr .= ",multifunction=on" if $multifunction;
452 $devicestr .= ",romfile=/usr/share/kvm/$d->{romfile}" if $d->{romfile};
2141a802 453 $devicestr .= ",bootindex=$bootorder->{$id}" if $bootorder->{$id};
74c17b7a
SR
454 }
455
456 push @$devices, '-device', $devicestr;
457 $j++;
458 }
459 }
460
13d68979 461 return ($kvm_off, $gpu_passthrough, $legacy_igd);
74c17b7a
SR
462}
463
acd4b777
DC
464sub prepare_pci_device {
465 my ($vmid, $pciid, $confslot, $mdev) = @_;
466
467 my $info = PVE::SysFSTools::pci_device_info("$pciid");
468 die "IOMMU not present\n" if !PVE::SysFSTools::check_iommu_support();
469 die "no pci device info for device '$pciid'\n" if !$info;
470
471 if ($mdev) {
472 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $confslot);
473 PVE::SysFSTools::pci_create_mdev_device($pciid, $uuid, $mdev);
474 } else {
475 die "can't unbind/bind PCI group to VFIO '$pciid'\n"
476 if !PVE::SysFSTools::pci_dev_group_bind_to_vfio($pciid);
477 die "can't reset PCI device '$pciid'\n"
478 if $info->{has_fl_reset} && !PVE::SysFSTools::pci_dev_reset($info);
479 }
480
481 return;
482}
483
b71351a7 4841;