use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
use PVE::CGroup;
+use PVE::CpuSet;
use PVE::DataCenterConfig;
use PVE::Exception qw(raise raise_param_exc);
use PVE::Format qw(render_duration render_bytes);
use PVE::JSONSchema qw(get_standard_option parse_property_string);
use PVE::ProcFSTools;
use PVE::PBSClient;
+use PVE::RESTEnvironment qw(log_warn);
use PVE::RPCEnvironment;
use PVE::Storage;
use PVE::SysFSTools;
use PVE::QMPClient;
use PVE::QemuConfig;
-use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
+use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
use PVE::QemuServer::Cloudinit;
use PVE::QemuServer::CGroup;
use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
optional => 1,
});
-
-sub map_storage {
- my ($map, $source) = @_;
-
- return $source if !defined($map);
-
- return $map->{entries}->{$source}
- if $map->{entries} && defined($map->{entries}->{$source});
-
- return $map->{default} if $map->{default};
-
- # identity (fallback)
- return $source;
-}
-
-PVE::JSONSchema::register_standard_option('pve-targetstorage', {
- description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
- type => 'string',
- format => 'storagepair-list',
- optional => 1,
-});
-
#no warnings 'redefine';
my $nodename_cache;
default => 'std',
optional => 1,
default_key => 1,
- enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
+ enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
},
memory => {
description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
optional => 1,
type => 'string', format => 'pve-hotplug-features',
description => "Selectively enable hotplug features. This is a comma separated list of"
- ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
- ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
+ ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
+ ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
+ ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
+ ." windows > 7.",
default => 'network,disk,usb',
},
reboot => {
verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
." The larger the number is, the more CPU time this VM gets. Number is relative to"
." weights of all the other running VMs.",
- minimum => 2,
+ minimum => 1,
maximum => 262144,
default => 'cgroup v1: 1024, cgroup v2: 100',
},
keyboard => {
optional => 1,
type => 'string',
- description => "Keyboard layout for VNC server. The default is read from the"
- ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
+ description => "Keyboard layout for VNC server. This option is generally not required and"
+ ." is often better handled from within the guest OS.",
enum => PVE::Tools::kvmkeymaplist(),
default => undef,
},
description => "Some (read-only) meta-information about this guest.",
optional => 1,
},
+ affinity => {
+ type => 'string', format => 'pve-cpuset',
+ description => "List of host cores used to execute guest processes.",
+ optional => 1,
+ },
};
my $cicustom_fmt = {
searchdomain => {
optional => 1,
type => 'string',
- description => "cloud-init: Sets DNS search domains for a container. Create will'
+ description => 'cloud-init: Sets DNS search domains for a container. Create will'
.' automatically use the setting from the host if neither searchdomain nor nameserver'
- .' are set.",
+ .' are set.',
},
nameserver => {
optional => 1,
type => 'string', format => 'address-list',
- description => "cloud-init: Sets DNS server IP address for a container. Create will'
+ description => 'cloud-init: Sets DNS server IP address for a container. Create will'
.' automatically use the setting from the host if neither searchdomain nor nameserver'
- .' are set.",
+ .' are set.',
},
sshkeys => {
optional => 1,
PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
}
-my $MAX_USB_DEVICES = 5;
+my $MAX_USB_DEVICES = 14;
my $MAX_NETS = 32;
my $MAX_SERIAL_PORTS = 4;
my $MAX_PARALLEL_PORTS = 3;
default_key => 1,
},
(map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
- bridge => {
- type => 'string',
+ bridge => get_standard_option('pve-bridge-id', {
description => $net_fmt_bridge_descr,
- format_description => 'bridge',
- pattern => '[-_.\w\d]+',
optional => 1,
- },
+ }),
queues => {
type => 'integer',
minimum => 0, maximum => 16,
$confdesc->{$key} = $confdesc_cloudinit->{$key};
}
+PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
+sub pve_verify_cpuset {
+ my ($set_text, $noerr) = @_;
+
+ my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
+
+ if ($@) {
+ return if $noerr;
+ die "unable to parse cpuset option\n";
+ }
+
+ return PVE::CpuSet->new($members)->short_string();
+}
+
PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
sub verify_volume_id_or_qm_path {
my ($volid, $noerr) = @_;
- if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
- return $volid;
- }
+ return $volid if $volid eq 'none' || $volid eq 'cdrom';
+
+ return verify_volume_id_or_absolute_path($volid, $noerr);
+}
+
+PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
+sub verify_volume_id_or_absolute_path {
+ my ($volid, $noerr) = @_;
+
+ return $volid if $volid =~ m|^/|;
- # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
$volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
if ($@) {
return if $noerr;
usb3 => {
optional => 1,
type => 'boolean',
- description => "Specifies whether if given host option is a USB3 device or port.",
+ description => "Specifies whether if given host option is a USB3 device or port."
+ ." For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
+ ." is irrelevant (all devices are plugged into a xhci controller).",
default => 0,
},
};
my $usbdesc = {
optional => 1,
type => 'string', format => $usb_fmt,
- description => "Configure an USB device (n is 0 to 4).",
+ description => "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
+ ." l26 or windows > 7, n can be up to 14).",
};
PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
$data = $confdesc->{hotplug}->{default} if $data eq '1';
foreach my $feature (PVE::Tools::split_list($data)) {
- if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
+ if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
$res->{$1} = 1;
} else {
die "invalid hotplug feature '$feature'\n";
# we use uhci for old VMs because tablet driver was buggy in older qemu
my $usbbus;
- if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
+ if ($q35 || $arch eq 'aarch64') {
$usbbus = 'ehci';
} else {
$usbbus = 'uhci';
# sometimes, just plain disable...
my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
+ # io_uring causes problems when used with CIFS since kernel 5.15
+ # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
+ my $cifs_no_io_uring = $scfg && $scfg->{type} eq 'cifs';
+
if (!$drive->{aio}) {
- if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
+ if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring && !$cifs_no_io_uring) {
# io_uring supports all cache modes
$opts .= ",aio=io_uring";
} else {
my ($pbs_conf, $pbs_name) = @_;
my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
$blockdev .= ",repository=$pbs_conf->{repository}";
+ $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
$blockdev .= ",snapshot=$pbs_conf->{snapshot}";
$blockdev .= ",archive=$pbs_conf->{archive}";
$blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
}
sub print_netdevice_full {
- my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
+ my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
my $device = $net->{model};
if ($net->{model} eq 'virtio') {
# and out of each queue plus one config interrupt and control vector queue
my $vectors = $net->{queues} * 2 + 2;
$tmpstr .= ",vectors=$vectors,mq=on";
+ if (min_version($machine_version, 7, 1)) {
+ $tmpstr .= ",packed=on";
+ }
+ }
+
+ if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
+ $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
}
+
$tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
if (my $mtu = $net->{mtu}) {
'std' => 'VGA',
'vmware' => 'vmware-svga',
'virtio' => 'virtio-vga',
+ 'virtio-gl' => 'virtio-vga-gl',
};
sub print_vga_device {
my $memory = "";
if ($vgamem_mb) {
- if ($vga->{type} eq 'virtio') {
+ if ($vga->{type} =~ /^virtio/) {
my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
$memory = ",max_hostmem=$bytes";
} elsif ($qxlnum) {
$pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
}
+ if ($vga->{type} eq 'virtio-gl') {
+ my $base = '/usr/lib/x86_64-linux-gnu/lib';
+ die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
+ if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
+
+ die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
+ if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
+ }
+
return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
}
# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
sub parse_net {
- my ($data) = @_;
+ my ($data, $disable_mac_autogen) = @_;
my $res = eval { parse_property_string($net_fmt, $data) };
if ($@) {
warn $@;
return;
}
- if (!defined($res->{macaddr})) {
+ if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
$res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
}
if (drive_is_cloudinit($drive)) {
eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
warn $@ if $@;
+ delete $conf->{cloudinit};
} elsif (!drive_is_cdrom($drive)) {
my $volid = $drive->{file};
if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
);
}
+sub qemu_created_version_fixups {
+ my ($conf, $forcemachine, $kvmver) = @_;
+
+ my $meta = parse_meta_info($conf->{meta}) // {};
+ my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
+
+ # check if we need to apply some handling for VMs that always use the latest machine version but
+ # had a machine version transition happen that affected HW such that, e.g., an OS config change
+ # would be required (we do not want to pin machine version for non-windows OS type)
+ if (
+ (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
+ && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
+ && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
+ && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
+ ) {
+ my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
+ if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
+ # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
+ # and thus with the predictable interface naming of systemd
+ return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
+ }
+ }
+ return;
+}
+
PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
sub verify_usb_device {
my ($value, $noerr) = @_;
# add JSON properties for create and set function
sub json_config_properties {
- my $prop = shift;
+ my ($prop, $with_disk_alloc) = @_;
my $skip_json_config_opts = {
parent => 1,
foreach my $opt (keys %$confdesc) {
next if $skip_json_config_opts->{$opt};
- $prop->{$opt} = $confdesc->{$opt};
+
+ if ($with_disk_alloc && is_valid_drivename($opt)) {
+ $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
+ } else {
+ $prop->{$opt} = $confdesc->{$opt};
+ }
}
return $prop;
}
+# Properties that we can read from an OVF file
+sub json_ovf_properties {
+ my $prop = {};
+
+ for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
+ $prop->{$device} = {
+ type => 'string',
+ format => 'pve-volume-id-or-absolute-path',
+ description => "Disk image that gets imported to $device",
+ optional => 1,
+ };
+ }
+
+ $prop->{cores} = {
+ type => 'integer',
+ description => "The number of CPU cores.",
+ optional => 1,
+ };
+ $prop->{memory} = {
+ type => 'integer',
+ description => "Amount of RAM for the VM in MB.",
+ optional => 1,
+ };
+ $prop->{name} = {
+ type => 'string',
+ description => "Name of the VM.",
+ optional => 1,
+ };
+
+ return $prop;
+}
+
# return copy of $confdesc_cloudinit to generate documentation
sub cloudinit_config_properties {
}
sub parse_vm_config {
- my ($filename, $raw) = @_;
+ my ($filename, $raw, $strict) = @_;
return if !defined($raw);
digest => Digest::SHA::sha1_hex($raw),
snapshots => {},
pending => {},
+ cloudinit => {},
+ };
+
+ my $handle_error = sub {
+ my ($msg) = @_;
+
+ if ($strict) {
+ die $msg;
+ } else {
+ warn $msg;
+ }
};
$filename =~ m|/qemu-server/(\d+)\.conf$|
$descr = undef;
$conf = $res->{$section} = {};
next;
+ } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
+ $section = 'cloudinit';
+ $descr = undef;
+ $conf = $res->{$section} = {};
+ next;
} elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
$section = $1;
next;
}
- if ($line =~ m/^\#(.*)\s*$/) {
+ if ($line =~ m/^\#(.*)$/) {
$descr = '' if !defined($descr);
$descr .= PVE::Tools::decode_text($1) . "\n";
next;
if ($section eq 'pending') {
$conf->{delete} = $value; # we parse this later
} else {
- warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
+ $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
}
} elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
my $key = $1;
my $value = $2;
eval { $value = check_type($key, $value); };
if ($@) {
- warn "vm $vmid - unable to parse value of '$key' - $@";
+ $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
} else {
$key = 'ide2' if $key eq 'cdrom';
my $fmt = $confdesc->{$key}->{format};
$v->{file} = $volid;
$value = print_drive($v);
} else {
- warn "vm $vmid - unable to parse value of '$key'\n";
+ $handle_error->("vm $vmid - unable to parse value of '$key'\n");
next;
}
}
$conf->{$key} = $value;
}
} else {
- warn "vm $vmid - unable to parse config: $line\n";
+ $handle_error->("vm $vmid - unable to parse config: $line\n");
}
}
foreach my $key (keys %$cref) {
next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
- $key eq 'snapstate' || $key eq 'pending';
+ $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
my $value = $cref->{$key};
if ($key eq 'delete') {
die "propertry 'delete' is only allowed in [PENDING]\n"
&$cleanup_config($conf->{pending}, 1);
+ &$cleanup_config($conf->{cloudinit});
+
foreach my $snapname (keys %{$conf->{snapshots}}) {
die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
&$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
}
foreach my $key (sort keys %$conf) {
- next if $key =~ /^(digest|description|pending|snapshots)$/;
+ next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
$raw .= "$key: $conf->{$key}\n";
}
return $raw;
$raw .= &$generate_raw_config($conf->{pending}, 1);
}
+ if (scalar(keys %{$conf->{cloudinit}})){
+ $raw .= "\n[special:cloudinit]\n";
+ $raw .= &$generate_raw_config($conf->{cloudinit});
+ }
+
foreach my $snapname (sort keys %{$conf->{snapshots}}) {
$raw .= "\n[$snapname]\n";
$raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
return \@flags;
}
-my sub get_cpuunits {
- my ($conf) = @_;
- return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
+# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
+# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
+my sub should_disable_smm {
+ my ($conf, $vga) = @_;
+
+ return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
+ $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
}
+
sub config_to_command {
my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
$pbs_backing) = @_;
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
- my $cpuunits = get_cpuunits($conf);
+ if ($conf->{affinity}) {
+ push @$cmd, "/usr/bin/taskset";
+ push @$cmd, "--cpu-list";
+ push @$cmd, "--all-tasks";
+ push @$cmd, $conf->{affinity};
+ }
push @$cmd, $kvm_binary;
my $vmname = $conf->{name} || "vm$vmid";
- push @$cmd, '-name', $vmname;
+ push @$cmd, '-name', "$vmname,debug-threads=on";
push @$cmd, '-no-shutdown';
$read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
} else {
- warn "no efidisk configured! Using temporary efivars disk.\n";
+ log_warn("no efidisk configured! Using temporary efivars disk.");
$path = "/tmp/$vmid-ovmf.fd";
PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
$format = 'raw';
}
}
+ if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
+ push @$cmd, $fixups->@*;
+ }
+
if ($conf->{vmgenid}) {
push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
}
# add usb controllers
my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
- $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
+ $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES, $machine_version);
push @$devices, @usbcontrollers if @usbcontrollers;
my $vga = parse_vga($conf->{vga});
$usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
- $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
+ $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder, $machine_version);
push @$devices, @usbdevices if @usbdevices;
# serial devices
if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
push @$devices, '-device', print_vga_device(
$conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
+
+ push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
+
my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
push @$cmd, '-vnc', "unix:$socket,password=on";
} else {
my $spice_port;
- if ($qxlnum) {
+ if ($qxlnum || $vga->{type} =~ /^virtio/) {
if ($qxlnum > 1) {
if ($winversion){
for (my $i = 1; $i < $qxlnum; $i++){
# enable balloon by default, unless explicitly disabled
if (!defined($conf->{balloon}) || $conf->{balloon}) {
my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
- push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
+ my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
+ $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
+ push @$devices, '-device', $ballooncmd;
}
if ($conf->{watchdog}) {
$iothread .= ",iothread=iothread-$controller_prefix$controller";
push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
} elsif ($drive->{iothread}) {
- warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
+ log_warn(
+ "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
+ );
}
my $queues = '';
next if !$conf->{$netname};
my $d = parse_net($conf->{$netname});
next if !$d;
+ # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
$use_virtio = 1 if $d->{model} eq 'virtio';
push @$devices, '-netdev', $netdevfull;
my $netdevicefull = print_netdevice_full(
- $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
+ $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
push @$devices, '-device', $netdevicefull;
}
push @$machineFlags, 'accel=tcg';
}
+ push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga);
+
my $machine_type_min = $machine_type;
if ($add_pve_version) {
$machine_type_min =~ s/\+pve\d+$//;
# qom-list path=/machine/peripheral
my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
foreach my $per (@$resperipheral) {
- if ($per->{name} =~ m/^usb\d+$/) {
+ if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
$devices->{$per->{name}} = 1;
}
}
qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
} elsif ($deviceid eq 'keyboard') {
qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
+ } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
+ my $id = $1;
+ qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
} elsif ($deviceid =~ m/^usb(\d+)$/) {
- die "usb hotplug currently not reliable\n";
- # since we can't reliably hot unplug all added usb devices and usb
- # passthrough breaks live migration we disable usb hotplugging for now
- #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
qemu_iothread_add($vmid, $deviceid, $device);
return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
+ my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
my $use_old_bios_files = undef;
($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
my $netdevicefull = print_netdevice_full(
- $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
+ $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
qemu_deviceadd($vmid, $netdevicefull);
eval {
qemu_deviceaddverify($vmid, $deviceid);
my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
- if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
+ if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
qemu_devicedel($vmid, $deviceid);
+ } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
} elsif ($deviceid =~ m/^usb\d+$/) {
- die "usb hotplug currently not reliable\n";
- # when unplugging usb devices this way, there may be remaining usb
- # controllers/hubs so we disable it for now
- #qemu_devicedel($vmid, $deviceid);
- #qemu_devicedelverify($vmid, $deviceid);
+ qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
} elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
my $device = parse_drive($deviceid, $conf->{$deviceid});
my $device = parse_drive($deviceid, $conf->{$deviceid});
qemu_devicedel($vmid, $deviceid);
+ qemu_devicedelverify($vmid, $deviceid);
qemu_drivedel($vmid, $deviceid);
qemu_deletescsihw($conf, $vmid, $deviceid);
return 1;
}
+sub qemu_spice_usbredir_chardev_add {
+ my ($vmid, $id) = @_;
+
+ mon_cmd($vmid, "chardev-add" , (
+ id => $id,
+ backend => {
+ type => 'spicevmc',
+ data => {
+ type => "usbredir",
+ },
+ },
+ ));
+}
+
sub qemu_deviceadd {
my ($vmid, $devicefull) = @_;
vm_deviceunplug($vmid, $conf, $deviceid);
# check if xhci controller is necessary and available
- if ($device->{usb3}) {
-
- my $devicelist = vm_devices_list($vmid);
+ my $devicelist = vm_devices_list($vmid);
- if (!$devicelist->{xhci}) {
- my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
- qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
- }
+ if (!$devicelist->{xhci}) {
+ my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
+ qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
}
+
+ # print_usbdevice_full expects the parsed device
my $d = parse_usb_device($device->{host});
$d->{usb3} = $device->{usb3};
'tags' => 1,
};
+for my $opt (keys %$confdesc_cloudinit) {
+ $fast_plug_option->{$opt} = 1;
+};
+
# hotplug changes in [PENDING]
# $selection hash can be used to only apply specified options, for
# example: { cores => 1 } (only apply changed 'cores')
PVE::QemuConfig->write_config($vmid, $conf);
}
+ my $ostype = $conf->{ostype};
+ my $version = extract_version($machine_type, get_running_qemu_version($vmid));
my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
+ my $usb_hotplug = $hotplug_features->{usb}
+ && min_version($version, 7, 1)
+ && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
my $cgroup = PVE::QemuServer::CGroup->new($vmid);
my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
vm_deviceunplug($vmid, $conf, 'tablet');
vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
}
- } elsif ($opt =~ m/^usb\d+/) {
- die "skip\n";
- # since we cannot reliably hot unplug usb devices we are disabling it
- #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
- #vm_deviceunplug($vmid, $conf, $opt);
+ } elsif ($opt =~ m/^usb(\d+)$/) {
+ my $index = $1;
+ die "skip\n" if !$usb_hotplug;
+ vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
+ vm_deviceunplug($vmid, $conf, $opt);
} elsif ($opt eq 'vcpus') {
die "skip\n" if !$hotplug_features->{cpu};
qemu_cpu_hotplug($vmid, $conf, undef);
die "skip\n" if !$hotplug_features->{memory};
PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares(undef, 1024);
+ $cgroup->change_cpu_shares(undef);
} elsif ($opt eq 'cpulimit') {
$cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
} else {
}
}
- my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
- $apply_pending_cloudinit = sub {
- return if $apply_pending_cloudinit_done; # once is enough
- $apply_pending_cloudinit_done = 1; # once is enough
-
- my ($key, $value) = @_;
-
- my @cloudinit_opts = keys %$confdesc_cloudinit;
- foreach my $opt (keys %{$conf->{pending}}) {
- next if !grep { $_ eq $opt } @cloudinit_opts;
- $conf->{$opt} = delete $conf->{pending}->{$opt};
- }
-
- my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
- foreach my $opt (sort keys %$pending_delete_hash) {
- next if !grep { $_ eq $opt } @cloudinit_opts;
- PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
- delete $conf->{$opt};
- }
-
- my $new_conf = { %$conf };
- $new_conf->{$key} = $value;
- PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
- };
-
foreach my $opt (keys %{$conf->{pending}}) {
next if $selection && !$selection->{$opt};
my $value = $conf->{pending}->{$opt};
vm_deviceunplug($vmid, $conf, 'tablet');
vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
}
- } elsif ($opt =~ m/^usb\d+$/) {
- die "skip\n";
- # since we cannot reliably hot unplug usb devices we disable it for now
- #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
- #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
- #die "skip\n" if !$d;
- #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
+ } elsif ($opt =~ m/^usb(\d+)$/) {
+ my $index = $1;
+ die "skip\n" if !$usb_hotplug;
+ my $d = eval { parse_property_string($usbdesc->{format}, $value) };
+ my $id = $opt;
+ if ($d->{host} eq 'spice') {
+ $id = "usbredirdev$index";
+ }
+ qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
} elsif ($opt eq 'vcpus') {
die "skip\n" if !$hotplug_features->{cpu};
qemu_cpu_hotplug($vmid, $conf, $value);
# some changes can be done without hotplug
my $drive = parse_drive($opt, $value);
if (drive_is_cloudinit($drive)) {
- &$apply_pending_cloudinit($opt, $value);
+ PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
}
vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
$vmid, $opt, $value, $arch, $machine_type);
die "skip\n" if !$hotplug_features->{memory};
$value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
} elsif ($opt eq 'cpuunits') {
- $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
+ my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
+ $cgroup->change_cpu_shares($new_cpuunits);
} elsif ($opt eq 'cpulimit') {
my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
$cgroup->change_cpu_quota($cpulimit, 100000);
}
}
+ # unplug xhci controller if no usb device is left
+ if ($usb_hotplug) {
+ my $has_usb = 0;
+ for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
+ next if !defined($conf->{"usb$i"});
+ $has_usb = 1;
+ last;
+ }
+ if (!$has_usb) {
+ vm_deviceunplug($vmid, $conf, 'xhci');
+ }
+ }
+
PVE::QemuConfig->write_config($vmid, $conf);
+
+ if($hotplug_features->{cloudinit}) {
+ my $pending = PVE::QemuServer::Cloudinit::get_pending_config($conf, $vmid);
+ my $regenerate = undef;
+ for my $item (@$pending) {
+ $regenerate = 1 if defined($item->{delete}) or defined($item->{pending});
+ }
+ PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid) if $regenerate;
+ }
}
sub try_deallocate_drive {
sub vmconfig_apply_pending {
my ($vmid, $conf, $storecfg, $errors) = @_;
+ return if !scalar(keys %{$conf->{pending}});
+
my $add_apply_error = sub {
my ($opt, $msg) = @_;
my $err_msg = "unable to apply pending change $opt : $msg";
PVE::QemuConfig->cleanup_pending($conf);
+ my $generate_cloudnit = undef;
+
foreach my $opt (keys %{$conf->{pending}}) { # add/change
next if $opt eq 'delete'; # just to be sure
eval {
if (my $err = $@) {
$add_apply_error->($opt, $err);
} else {
+
+ if (is_valid_drivename($opt)) {
+ my $drive = parse_drive($opt, $conf->{pending}->{$opt});
+ $generate_cloudnit = 1 if drive_is_cloudinit($drive);
+ }
+
$conf->{$opt} = delete $conf->{pending}->{$opt};
}
}
# write all changes at once to avoid unnecessary i/o
PVE::QemuConfig->write_config($vmid, $conf);
+ PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if $generate_cloudnit;
}
sub vmconfig_update_net {
vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
}
+sub vmconfig_update_cloudinit_drive {
+ my ($storecfg, $conf, $vmid) = @_;
+
+ my $cloudinit_ds = undef;
+ my $cloudinit_drive = undef;
+
+ PVE::QemuConfig->foreach_volume($conf, sub {
+ my ($ds, $drive) = @_;
+ if (PVE::QemuServer::drive_is_cloudinit($drive)) {
+ $cloudinit_ds = $ds;
+ $cloudinit_drive = $drive;
+ }
+ });
+
+ return if !$cloudinit_drive;
+
+ PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid);
+ my $running = PVE::QemuServer::check_running($vmid);
+
+ if ($running) {
+ my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
+ if ($path) {
+ mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
+ mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
+ }
+ }
+}
+
# called in locked context by incoming migration
sub vm_migrate_get_nbd_disks {
my ($storecfg, $conf, $replicated_volumes) = @_;
my ($ds, $drive) = @_;
return if drive_is_cdrom($drive);
+ return if $ds eq 'tpmstate0';
my $volid = $drive->{file};
sub vm_migrate_alloc_nbd_disks {
my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
- my $format = undef;
-
my $nbd = {};
foreach my $opt (sort keys %$source_volumes) {
- my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
+ my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
if ($use_existing) {
$nbd->{$opt}->{drivestr} = print_drive($drive);
next;
}
- # If a remote storage is specified and the format of the original
- # volume is not available there, fall back to the default format.
- # Otherwise use the same format as the original.
+ # storage mapping + volname = regular migration
+ # storage mapping + format = remote migration
+ # order of precedence, filtered by whether storage supports it:
+ # 1. explicit requested format
+ # 2. format of current volume
+ # 3. default format of storage
if (!$storagemap->{identity}) {
- $storeid = map_storage($storagemap, $storeid);
+ $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
- my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- my $fileFormat = qemu_img_format($scfg, $volname);
- $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
+ if (!$format || !grep { $format eq $_ } @$validFormats) {
+ if ($volname) {
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ my $fileFormat = qemu_img_format($scfg, $volname);
+ $format = $fileFormat
+ if grep { $fileFormat eq $_ } @$validFormats;
+ }
+ $format //= $defFormat;
+ }
} else {
+ # can't happen for remote migration, so $volname is always defined
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
$format = qemu_img_format($scfg, $volname);
}
# network => CIDR of migration network
# type => secure/insecure - tunnel over encrypted connection or plain-text
# nbd_proto_version => int, 0 for TCP, 1 for UNIX
-# replicated_volumes = which volids should be re-used with bitmaps for nbd migration
+# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
+# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
+# contained in config
sub vm_start_nolock {
my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
# this way we can reuse the old ISO with the correct config
PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
+ # override offline migrated volumes, conf is out of date still
+ if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
+ for my $key (sort keys $offline_volumes->%*) {
+ my $parsed = parse_drive($key, $conf->{$key});
+ $parsed->{file} = $offline_volumes->{$key};
+ $conf->{$key} = print_drive($parsed);
+ }
+ }
+
my $defaults = load_defaults();
# set environment variable useful inside network script
$pci_devices->{$i} = parse_hostpci($dev);
}
- my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
+ # do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
+ my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
+
+ # map to a flat list of pci ids
+ my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
+
# reserve all PCI IDs before actually doing anything with them
PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
eval {
+ my $uuid;
for my $id (sort keys %$pci_devices) {
my $d = $pci_devices->{$id};
for my $dev ($d->{pciid}->@*) {
- PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
+ my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
+
+ # nvidia grid needs the uuid of the mdev as qemu parameter
+ if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') {
+ $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id);
+ }
}
}
+ push @$cmd, '-uuid', $uuid if defined($uuid);
};
if (my $err = $@) {
- eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+ eval { cleanup_pci_devices($vmid, $conf) };
warn $@ if $@;
die $err;
}
};
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
- PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
+ PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
- my $cpuunits = get_cpuunits($conf);
+ my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
my %run_params = (
timeout => $statefile ? undef : $start_timeout,
);
if (PVE::CGroup::cgroup_mode() == 2) {
- $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
$systemd_properties{CPUWeight} = $cpuunits;
} else {
$systemd_properties{CPUShares} = $cpuunits;
if (my $err = $@) {
# deactivate volumes if start fails
eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
- eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
+ warn $@ if $@;
+ eval { cleanup_pci_devices($vmid, $conf) };
+ warn $@ if $@;
die "start failed: $err";
}
my $nicconf = parse_net($conf->{$opt});
qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
}
+ add_nets_bridge_fdb($conf, $vmid);
}
mon_cmd($vmid, 'qom-set',
return $vollist;
}
+sub cleanup_pci_devices {
+ my ($vmid, $conf) = @_;
+
+ foreach my $key (keys %$conf) {
+ next if $key !~ m/^hostpci(\d+)$/;
+ my $hostpciindex = $1;
+ my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
+ my $d = parse_hostpci($conf->{$key});
+ if ($d->{mdev}) {
+ # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
+ # don't want to break ABI just for this two liner
+ my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
+ PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
+ }
+ }
+ PVE::QemuServer::PCI::remove_pci_reservation($vmid);
+}
+
sub vm_stop_cleanup {
my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
}
- my $ids = [];
- foreach my $key (keys %$conf) {
- next if $key !~ m/^hostpci(\d+)$/;
- my $hostpciindex = $1;
- my $d = parse_hostpci($conf->{$key});
- my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
-
- foreach my $pci (@{$d->{pciid}}) {
- my $pciid = $pci->{id};
- push @$ids, $pci->{id};
- PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
- }
- }
- PVE::QemuServer::PCI::remove_pci_reservation($ids);
+ cleanup_pci_devices($vmid, $conf);
vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
};
my $res = mon_cmd($vmid, 'query-status');
my $resume_cmd = 'cont';
my $reset = 0;
+ my $conf = PVE::QemuConfig->load_config($vmid);
if ($res->{status}) {
return if $res->{status} eq 'running'; # job done, go home
if (!$nocheck) {
- my $conf = PVE::QemuConfig->load_config($vmid);
-
PVE::QemuConfig->check_lock($conf)
if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
}
# request before the backup finishes for example
mon_cmd($vmid, "system_reset");
}
+
+ add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
+
mon_cmd($vmid, $resume_cmd);
});
}
my $restore_cleanup_oldconf = sub {
my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
+ my $kept_disks = {};
+
PVE::QemuConfig->foreach_volume($oldconf, sub {
my ($ds, $drive) = @_;
if (my $err = $@) {
warn $err;
}
+ } else {
+ $kept_disks->{$volid} = 1;
}
});
- # delete vmstate files, after the restore we have no snapshots anymore
- foreach my $snapname (keys %{$oldconf->{snapshots}}) {
+ # after the restore we have no snapshots anymore
+ for my $snapname (keys $oldconf->{snapshots}->%*) {
my $snap = $oldconf->{snapshots}->{$snapname};
if ($snap->{vmstate}) {
eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
warn $err;
}
}
+
+ for my $volid (keys $kept_disks->%*) {
+ eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
+ warn $@ if $@;
+ }
}
};
my $parse_backup_hints = sub {
my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
- my $virtdev_hash = {};
+ my $check_storage = sub { # assert if an image can be allocate
+ my ($storeid, $scfg) = @_;
+ die "Content type 'images' is not available on storage '$storeid'\n"
+ if !$scfg->{content}->{images};
+ $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
+ if $user ne 'root@pam';
+ };
+ my $virtdev_hash = {};
while (defined(my $line = <$fh>)) {
if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
$devinfo->{$devname}->{format} = $format;
$devinfo->{$devname}->{storeid} = $storeid;
- # check permission on storage
- my $pool = $options->{pool}; # todo: do we need that?
- if ($user ne 'root@pam') {
- $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
- }
+ my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $check_storage->($storeid, $scfg); # permission and content type check
$virtdev_hash->{$virtdev} = $devinfo->{$devname};
} elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
my $virtdev = $1;
my $drive = parse_drive($virtdev, $2);
+
if (drive_is_cloudinit($drive)) {
my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
$storeid = $options->{storage} if defined ($options->{storage});
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
+ $check_storage->($storeid, $scfg); # permission and content type check
+
$virtdev_hash->{$virtdev} = {
format => $format,
storeid => $storeid,
}
my $restore_deactivate_volumes = sub {
- my ($storecfg, $devinfo) = @_;
+ my ($storecfg, $virtdev_hash) = @_;
my $vollist = [];
- foreach my $devname (keys %$devinfo) {
- my $volid = $devinfo->{$devname}->{volid};
- push @$vollist, $volid if $volid;
+ for my $dev (values $virtdev_hash->%*) {
+ push $vollist->@*, $dev->{volid} if $dev->{volid};
}
- PVE::Storage::deactivate_volumes($storecfg, $vollist);
+ eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
+ print STDERR $@ if $@;
};
my $restore_destroy_volumes = sub {
- my ($storecfg, $devinfo) = @_;
+ my ($storecfg, $virtdev_hash) = @_;
- foreach my $devname (keys %$devinfo) {
- my $volid = $devinfo->{$devname}->{volid};
- next if !$volid;
+ for my $dev (values $virtdev_hash->%*) {
+ my $volid = $dev->{volid} or next;
eval {
- if ($volid =~ m|^/|) {
- unlink $volid || die 'unlink failed\n';
- } else {
- PVE::Storage::vdisk_free($storecfg, $volid);
- }
+ PVE::Storage::vdisk_free($storecfg, $volid);
print STDERR "temporary volume '$volid' sucessfuly removed\n";
};
print STDERR "unable to cleanup '$volid' - $@" if $@;
}
};
+my $restore_merge_config = sub {
+ my ($filename, $backup_conf_raw, $override_conf) = @_;
+
+ my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
+ for my $key (keys $override_conf->%*) {
+ $backup_conf->{$key} = $override_conf->{$key};
+ }
+
+ return $backup_conf;
+};
+
sub scan_volids {
my ($cfg, $vmid) = @_;
my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
my $repo = PVE::PBSClient::get_repository($scfg);
+ my $namespace = $scfg->{namespace};
# This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
my $new_conf_raw = '';
my $rpcenv = PVE::RPCEnvironment::get();
- my $devinfo = {};
+ my $devinfo = {}; # info about drives included in backup
+ my $virtdev_hash = {}; # info about allocated drives
eval {
# enable interrupts
my $index = PVE::Tools::file_get_contents($index_fn);
$index = decode_json($index);
- # print Dumper($index);
foreach my $info (@{$index->{files}}) {
if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
my $devname = $1;
my $fh = IO::File->new($cfgfn, "r") ||
die "unable to read qemu-server.conf - $!\n";
- my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
+ $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
# fixme: rate limit?
# for live-restore we only want to preload the efidisk and TPM state
next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
+ my @ns_arg;
+ if (defined(my $ns = $scfg->{namespace})) {
+ @ns_arg = ('--ns', $ns);
+ }
+
my $pbs_restore_cmd = [
'/usr/bin/pbs-restore',
'--repository', $repo,
+ @ns_arg,
$pbs_backup_name,
"$d->{devname}.img.fidx",
$path,
my $err = $@;
if ($err || !$options->{live}) {
- $restore_deactivate_volumes->($storecfg, $devinfo);
+ $restore_deactivate_volumes->($storecfg, $virtdev_hash);
}
rmtree $tmpdir;
if ($err) {
- $restore_destroy_volumes->($storecfg, $devinfo);
+ $restore_destroy_volumes->($storecfg, $virtdev_hash);
die $err;
}
$new_conf_raw .= "\nlock: create";
}
- PVE::Tools::file_set_contents($conffile, $new_conf_raw);
-
- PVE::Cluster::cfs_update(); # make sure we read new file
+ my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $options->{override_conf});
+ PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
warn $@ if $@;
# these special drives are already restored before start
delete $devinfo->{'drive-efidisk0'};
delete $devinfo->{'drive-tpmstate0-backup'};
- pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
+
+ my $pbs_opts = {
+ repo => $repo,
+ keyfile => $keyfile,
+ snapshot => $pbs_backup_name,
+ namespace => $namespace,
+ };
+ pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
PVE::QemuConfig->remove_lock($vmid, "create");
}
}
sub pbs_live_restore {
- my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
+ my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
print "starting VM for live-restore\n";
- print "repository: '$repo', snapshot: '$snap'\n";
+ print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
my $pbs_backing = {};
for my $ds (keys %$restored_disks) {
$ds =~ m/^drive-(.*)$/;
my $confname = $1;
$pbs_backing->{$confname} = {
- repository => $repo,
- snapshot => $snap,
+ repository => $opts->{repo},
+ snapshot => $opts->{snapshot},
archive => "$ds.img.fidx",
};
- $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
+ $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
+ $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
my $drive = parse_drive($confname, $conf->{$confname});
print "restoring '$ds' to '$drive->{file}'\n";
my $err = $@;
if ($err) {
- warn "An error occured during live-restore: $err\n";
+ warn "An error occurred during live-restore: $err\n";
_do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
die "live-restore failed\n";
}
my $oldtimeout;
my $timeout = 5;
- my $devinfo = {};
+ my $devinfo = {}; # info about drives included in backup
+ my $virtdev_hash = {}; # info about allocated drives
my $rpcenv = PVE::RPCEnvironment::get();
PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
}
- my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
+ $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
foreach my $info (values %{$virtdev_hash}) {
my $storeid = $info->{storeid};
alarm($oldtimeout) if $oldtimeout;
- $restore_deactivate_volumes->($cfg, $devinfo);
+ $restore_deactivate_volumes->($cfg, $virtdev_hash);
close($fifofh) if $fifofh;
unlink $mapfifo;
rmtree $tmpdir;
if ($err) {
- $restore_destroy_volumes->($cfg, $devinfo);
+ $restore_destroy_volumes->($cfg, $virtdev_hash);
die $err;
}
- PVE::Tools::file_set_contents($conffile, $new_conf_raw);
-
- PVE::Cluster::cfs_update(); # make sure we read new file
+ my $new_conf = $restore_merge_config->($conffile, $new_conf_raw, $opts->{override_conf});
+ PVE::QemuConfig->write_config($vmid, $new_conf);
eval { rescan($vmid, 1); };
warn $@ if $@;
sub restore_tar_archive {
my ($archive, $vmid, $user, $opts) = @_;
+ if (scalar(keys $opts->{override_conf}->%*) > 0) {
+ my $keystring = join(' ', keys $opts->{override_conf}->%*);
+ die "cannot pass along options ($keystring) when restoring from tar archive\n";
+ }
+
if ($archive ne '-') {
my $firstfile = tar_archive_read_firstfile($archive);
die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
$src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
$src_is_iscsi = ($src_path =~ m|^iscsi://|);
$cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
- } elsif (-f $src_volid) {
+ } elsif (-f $src_volid || -b $src_volid) {
$src_path = $src_volid;
if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
$src_format = $1;
if ($agent_running) {
print "freeze filesystem\n";
eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
+ warn $@ if $@;
} else {
print "suspend vm\n";
eval { PVE::QemuServer::vm_suspend($vmid, 1); };
+ warn $@ if $@;
}
# if we clone a disk for a new target vm, we don't switch the disk
if ($agent_running) {
print "unfreeze filesystem\n";
eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
+ warn $@ if $@;
} else {
print "resume vm\n";
- eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+ eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
+ warn $@ if $@;
}
last;
}
sub clone_disk {
- my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
- $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
+ my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
+
+ my ($vmid, $running) = $source->@{qw(vmid running)};
+ my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
+
+ my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
+ my ($storage, $format) = $dest->@{qw(storage format)};
+
+ my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
+
+ if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
+ die "cloning from/to EFI disk requires EFI disk\n"
+ if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
+ die "cloning from/to TPM state requires TPM state\n"
+ if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
+
+ # This would lead to two device nodes in QEMU pointing to the same backing image!
+ die "cannot change drive name when cloning disk from/to the same VM\n"
+ if $use_drive_mirror && $vmid == $newvmid;
+ }
+
+ die "cannot move TPM state while VM is running\n"
+ if $use_drive_mirror && $src_drivename eq 'tpmstate0';
my $newvolid;
+ print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
+ print "$src_drivename " if $src_drivename;
+ print "($drive->{file})\n";
+
if (!$full) {
- print "create linked clone of drive $drivename ($drive->{file})\n";
$newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
push @$newvollist, $newvolid;
} else {
my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
- print "create full clone of drive $drivename ($drive->{file})\n";
my $name = undef;
my $size = undef;
if (drive_is_cloudinit($drive)) {
}
$snapname = undef;
$size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
- } elsif ($drivename eq 'efidisk0') {
- $size = get_efivars_size($conf);
- } elsif ($drivename eq 'tpmstate0') {
+ } elsif ($dst_drivename eq 'efidisk0') {
+ $size = $efisize or die "internal error - need to specify EFI disk size\n";
+ } elsif ($dst_drivename eq 'tpmstate0') {
+ $dst_format = 'raw';
$size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
} else {
($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
}
my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
- if (!$running || $snapname) {
+ if ($use_drive_mirror) {
+ qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
+ $completion, $qga, $bwlimit);
+ } else {
# TODO: handle bwlimits
- if ($drivename eq 'efidisk0') {
+ if ($dst_drivename eq 'efidisk0') {
# the relevant data on the efidisk may be smaller than the source
# e.g. on RBD/ZFS, so we use dd to copy only the amount
# that is given by the OVMF_VARS.fd
- my $src_path = PVE::Storage::path($storecfg, $drive->{file});
+ my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
my $dst_path = PVE::Storage::path($storecfg, $newvolid);
+ my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
+
# better for Ceph if block size is not too small, see bug #3324
my $bs = 1024*1024;
- run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
- "if=$src_path", "of=$dst_path"]);
+ my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
+
+ if ($src_format eq 'qcow2' && $snapname) {
+ die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
+ if !min_version(kvm_user_version(), 6, 2);
+ push $cmd->@*, '-l', $snapname;
+ }
+ push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
+ run_command($cmd);
} else {
qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
}
- } else {
-
- die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
-
- my $kvmver = get_running_qemu_version ($vmid);
- if (!min_version($kvmver, 2, 7)) {
- die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
- if $drive->{iothread};
- }
-
- qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
- $completion, $qga, $bwlimit);
}
}
no_data_clone:
my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
- my $disk = $drive;
- $disk->{format} = undef;
+ my $disk = dclone($drive);
+ delete $disk->{format};
$disk->{file} = $newvolid;
$disk->{size} = $size if defined($size);
}
sub get_efivars_size {
- my ($conf) = @_;
+ my ($conf, $efidisk) = @_;
+
my $arch = get_vm_arch($conf);
- my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
+ $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
return ($maxdev, $controller, $controller_prefix);
}
-sub windows_version {
- my ($ostype) = @_;
-
- return 0 if !$ostype;
-
- my $winversion = 0;
-
- if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
- $winversion = 5;
- } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
- $winversion = 6;
- } elsif ($ostype =~ m/^win(\d+)$/) {
- $winversion = $1;
- }
-
- return $winversion;
-}
-
sub resolve_dst_disk_format {
my ($storecfg, $storeid, $src_volname, $format) = @_;
my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
return 1;
}
+sub add_nets_bridge_fdb {
+ my ($conf, $vmid) = @_;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $iface = "tap${vmid}i$1";
+ # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
+ my $net = parse_net($conf->{$opt}, 1) or next;
+
+ my $mac = $net->{macaddr};
+ if (!$mac) {
+ log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
+ if !file_read_firstline("/sys/class/net/$iface/brport/learning");
+ next;
+ }
+
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $net->{bridge}, $net->{firewall});
+ } else {
+ PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
+ }
+ }
+}
+
+sub del_nets_bridge_fdb {
+ my ($conf, $vmid) = @_;
+
+ for my $opt (keys %$conf) {
+ next if $opt !~ m/^net(\d+)$/;
+ my $iface = "tap${vmid}i$1";
+
+ my $net = parse_net($conf->{$opt}) or next;
+ my $mac = $net->{macaddr} or next;
+
+ if ($have_sdn) {
+ PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $net->{bridge}, $net->{firewall});
+ } else {
+ PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
+ }
+ }
+}
+
1;