use PVE::CGroup;
use PVE::DataCenterConfig;
use PVE::Exception qw(raise raise_param_exc);
+use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
use PVE::INotify;
use PVE::JSONSchema qw(get_standard_option parse_property_string);
The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
gateway should be provided.
-For IPv6 the special string 'auto' can be used to use stateless autoconfiguration.
+For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
+cloud-init 19.4 or newer.
If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
dhcp on IPv4.
return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
}
+my sub get_drive_id {
+ my ($drive) = @_;
+ return "$drive->{interface}$drive->{index}";
+}
+
sub print_drivedevice_full {
my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
my $device = '';
my $maxdev = 0;
- my $drive_id = "$drive->{interface}$drive->{index}";
+ my $drive_id = get_drive_id($drive);
if ($drive->{interface} eq 'virtio') {
my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
$device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
}
sub print_drive_commandline_full {
- my ($storecfg, $vmid, $drive) = @_;
+ my ($storecfg, $vmid, $drive, $pbs_name) = @_;
my $path;
my $volid = $drive->{file};
- my $format;
+ my $format = $drive->{format};
+ my $drive_id = get_drive_id($drive);
if (drive_is_cdrom($drive)) {
$path = get_iso_path($storecfg, $vmid, $volid);
+ die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
} else {
my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
if ($storeid) {
$path = PVE::Storage::path($storecfg, $volid);
my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
- $format = qemu_img_format($scfg, $volname);
+ $format //= qemu_img_format($scfg, $volname);
} else {
$path = $volid;
- $format = "raw";
+ $format //= "raw";
}
}
+ my $is_rbd = $path =~ m/^rbd:/;
+
my $opts = '';
- my @qemu_drive_options = qw(heads secs cyls trans media format cache rerror werror aio discard);
+ my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
foreach my $o (@qemu_drive_options) {
$opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
}
}
}
- $opts .= ",format=$format" if $format && !$drive->{format};
+ if ($pbs_name) {
+ $format = "rbd" if $is_rbd;
+ die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
+ if !$format;
+ $opts .= ",format=alloc-track,file.driver=$format";
+ } elsif ($format) {
+ $opts .= ",format=$format";
+ }
my $cache_direct = 0;
# This used to be our default with discard not being specified:
$detectzeroes = 'on';
}
- $opts .= ",detect-zeroes=$detectzeroes" if $detectzeroes;
+
+ # note: 'detect-zeroes' works per blockdev and we want it to persist
+ # after the alloc-track is removed, so put it on 'file' directly
+ my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
+ $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
}
- my $pathinfo = $path ? "file=$path," : '';
+ if ($pbs_name) {
+ $opts .= ",backing=$pbs_name";
+ $opts .= ",auto-remove=on";
+ }
+
+ # my $file_param = $pbs_name ? "file.file.filename" : "file";
+ my $file_param = "file";
+ if ($pbs_name) {
+ # non-rbd drivers require the underlying file to be a seperate block
+ # node, so add a second .file indirection
+ $file_param .= ".file" if !$is_rbd;
+ $file_param .= ".filename";
+ }
+ my $pathinfo = $path ? "$file_param=$path," : '';
return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
}
+sub print_pbs_blockdev {
+ my ($pbs_conf, $pbs_name) = @_;
+ my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
+ $blockdev .= ",repository=$pbs_conf->{repository}";
+ $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
+ $blockdev .= ",archive=$pbs_conf->{archive}";
+ $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
+ return $blockdev;
+}
+
sub print_netdevice_full {
my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
});
if ($purge_unreferenced) { # also remove unreferenced disk
- my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid);
+ my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
PVE::Storage::foreach_volid($vmdisks, sub {
my ($volid, $sid, $volname, $d) = @_;
eval { PVE::Storage::vdisk_free($storecfg, $volid) };
}
}
- if ($add_pve_version && $machine !~ m/\+pve\d+$/) {
+ if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
+ my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
+ $machine = $1 if $is_pxe;
+
# for version-pinned machines that do not include a pve-version (e.g.
# pc-q35-4.1), we assume 0 to keep them stable in case we bump
$machine .= '+pve0';
+
+ $machine .= '.pxe' if $is_pxe;
}
return $machine;
}
sub config_to_command {
- my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu) = @_;
+ my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
+ $pbs_backing) = @_;
my $cmd = [];
my $globalFlags = [];
$ahcicontroller->{$controller}=1;
}
- my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive);
+ my $pbs_conf = $pbs_backing->{$ds};
+ my $pbs_name = undef;
+ if ($pbs_conf) {
+ $pbs_name = "drive-$ds-pbs";
+ push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
+ }
+
+ my $drive_cmd = print_drive_commandline_full($storecfg, $vmid, $drive, $pbs_name);
$drive_cmd .= ',readonly' if PVE::QemuConfig->is_template($conf);
push @$devices, '-drive',$drive_cmd;
$volhash->{$volid}->{is_unused} //= 0;
$volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
+
+ $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
};
my $include_opts = {
# timeout => in seconds
# paused => start VM in paused state (backup)
# resume => resume from hibernation
+# pbs-backing => {
+# sata0 => {
+# repository
+# snapshot
+# keyfile
+# archive
+# },
+# virtio2 => ...
+# }
# migrate_opts:
# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
# migratedfrom => source node
print "Resuming suspended VM\n";
}
- my ($cmd, $vollist, $spice_port) =
- config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
+ my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
+ $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
my $migration_ip;
my $get_migration_ip = sub {
return $map;
};
-my $restore_update_config_line = sub {
- my ($cookie, $vmid, $map, $line, $unique) = @_;
+sub restore_update_config_line {
+ my ($cookie, $map, $line, $unique) = @_;
return '' if $line =~ m/^\#qmdump\#/;
return '' if $line =~ m/^\#vzdump\#/;
}
return $res;
-};
+}
my $restore_deactivate_volumes = sub {
my ($storecfg, $devinfo) = @_;
}
};
+# FIXME For PVE 7.0, remove $content_type and always use 'images'
sub scan_volids {
- my ($cfg, $vmid) = @_;
+ my ($cfg, $vmid, $content_type) = @_;
- my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid);
+ my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, $content_type);
my $volid_hash = {};
foreach my $storeid (keys %$info) {
my $cfg = PVE::Storage::config();
- # FIXME: Remove once our RBD plugin can handle CT and VM on a single storage
- # see: https://pve.proxmox.com/pipermail/pve-devel/2018-July/032900.html
- foreach my $stor (keys %{$cfg->{ids}}) {
- delete($cfg->{ids}->{$stor}) if ! $cfg->{ids}->{$stor}->{content}->{images};
- }
-
print "rescan volumes...\n";
- my $volid_hash = scan_volids($cfg, $vmid);
+ my $volid_hash = scan_volids($cfg, $vmid, 'images');
my $updatefn = sub {
my ($vmid) = @_;
my $repo = PVE::PBSClient::get_repository($scfg);
- # This is only used for `pbs-restore`!
+ # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
local $ENV{PBS_PASSWORD} = $password;
local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
# allocate volumes
my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
- foreach my $virtdev (sort keys %$virtdev_hash) {
- my $d = $virtdev_hash->{$virtdev};
- next if $d->{is_cloudinit}; # no need to restore cloudinit
+ if (!$options->{live}) {
+ foreach my $virtdev (sort keys %$virtdev_hash) {
+ my $d = $virtdev_hash->{$virtdev};
+ next if $d->{is_cloudinit}; # no need to restore cloudinit
- my $volid = $d->{volid};
+ my $volid = $d->{volid};
- my $path = PVE::Storage::path($storecfg, $volid);
+ my $path = PVE::Storage::path($storecfg, $volid);
- # This is the ONLY user of the PBS_ env vars set on top of this function!
- my $pbs_restore_cmd = [
- '/usr/bin/pbs-restore',
- '--repository', $repo,
- $pbs_backup_name,
- "$d->{devname}.img.fidx",
- $path,
- '--verbose',
- ];
+ my $pbs_restore_cmd = [
+ '/usr/bin/pbs-restore',
+ '--repository', $repo,
+ $pbs_backup_name,
+ "$d->{devname}.img.fidx",
+ $path,
+ '--verbose',
+ ];
- push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
- push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
+ push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
+ push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
- if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
- push @$pbs_restore_cmd, '--skip-zero';
- }
+ if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
+ push @$pbs_restore_cmd, '--skip-zero';
+ }
- my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
- print "restore proxmox backup image: $dbg_cmdstring\n";
- run_command($pbs_restore_cmd);
+ my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
+ print "restore proxmox backup image: $dbg_cmdstring\n";
+ run_command($pbs_restore_cmd);
+ }
}
$fh->seek(0, 0) || die "seek failed - $!\n";
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$options->{unique},
};
my $err = $@;
- $restore_deactivate_volumes->($storecfg, $devinfo);
+ if ($err || !$options->{live}) {
+ $restore_deactivate_volumes->($storecfg, $devinfo);
+ }
rmtree $tmpdir;
die $err;
}
+ if ($options->{live}) {
+ # keep lock during live-restore
+ $new_conf_raw .= "\nlock: create";
+ }
+
PVE::Tools::file_set_contents($conffile, $new_conf_raw);
PVE::Cluster::cfs_update(); # make sure we read new file
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
+
+ if ($options->{live}) {
+ # enable interrupts
+ local $SIG{INT} =
+ local $SIG{TERM} =
+ local $SIG{QUIT} =
+ local $SIG{HUP} =
+ local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
+
+ my $conf = PVE::QemuConfig->load_config($vmid);
+ die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
+
+ pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
+
+ PVE::QemuConfig->remove_lock($vmid, "create");
+ }
+}
+
+sub pbs_live_restore {
+ my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
+
+ print "Starting VM for live-restore\n";
+
+ my $pbs_backing = {};
+ for my $ds (keys %$restored_disks) {
+ $ds =~ m/^drive-(.*)$/;
+ $pbs_backing->{$1} = {
+ repository => $repo,
+ snapshot => $snap,
+ archive => "$ds.img.fidx",
+ };
+ $pbs_backing->{$1}->{keyfile} = $keyfile if -e $keyfile;
+ }
+
+ my $drives_streamed = 0;
+ eval {
+ # make sure HA doesn't interrupt our restore by stopping the VM
+ if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
+ run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
+ }
+
+ # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
+ # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
+ vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
+
+ my $qmeventd_fd = register_qmeventd_handle($vmid);
+
+ # begin streaming, i.e. data copy from PBS to target disk for every vol,
+ # this will effectively collapse the backing image chain consisting of
+ # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
+ # removes itself once all backing images vanish with 'auto-remove=on')
+ my $jobs = {};
+ for my $ds (sort keys %$restored_disks) {
+ my $job_id = "restore-$ds";
+ mon_cmd($vmid, 'block-stream',
+ 'job-id' => $job_id,
+ device => "$ds",
+ );
+ $jobs->{$job_id} = {};
+ }
+
+ mon_cmd($vmid, 'cont');
+ qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
+
+ print "restore-drive jobs finished successfully, removing all tracking block devices"
+ ." to disconnect from Proxmox Backup Server\n";
+
+ for my $ds (sort keys %$restored_disks) {
+ mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
+ }
+
+ close($qmeventd_fd);
+ };
+
+ my $err = $@;
+
+ if ($err) {
+ warn "An error occured during live-restore: $err\n";
+ _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
+ die "live-restore failed\n";
+ }
}
sub restore_vma_archive {
my $cookie = { netcount => 0 };
while (defined(my $line = <$fh>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$opts->{unique},
eval { rescan($vmid, 1); };
warn $@ if $@;
+
+ PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
}
sub restore_tar_archive {
my $cookie = { netcount => 0 };
while (defined (my $line = <$srcfd>)) {
- $new_conf_raw .= $restore_update_config_line->(
+ $new_conf_raw .= restore_update_config_line(
$cookie,
- $vmid,
$map,
$line,
$opts->{unique},
if($line =~ m/\((\S+)\/100\%\)/){
my $percent = $1;
my $transferred = int($size * $percent / 100);
- my $remaining = $size - $transferred;
+ my $total_h = render_bytes($size, 1);
+ my $transferred_h = render_bytes($transferred, 1);
- print "transferred: $transferred bytes remaining: $remaining bytes total: $size bytes progression: $percent %\n";
+ print "transferred $transferred_h of $total_h ($percent%)";
}
};
# 'complete': wait until all jobs are ready, block-job-complete them (default)
# 'cancel': wait until all jobs are ready, block-job-cancel them
# 'skip': wait until all jobs are ready, return with block jobs in ready state
+# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
sub qemu_drive_mirror_monitor {
- my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
+ my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
$completion //= 'complete';
+ $op //= "mirror";
eval {
my $err_complete = 0;
+ my $starttime = time ();
while (1) {
- die "storage migration timed out\n" if $err_complete > 300;
+ die "block job ('$op') timed out\n" if $err_complete > 300;
my $stats = mon_cmd($vmid, "query-block-jobs");
+ my $ctime = time();
- my $running_mirror_jobs = {};
- foreach my $stat (@$stats) {
- next if $stat->{type} ne 'mirror';
- $running_mirror_jobs->{$stat->{device}} = $stat;
+ my $running_jobs = {};
+ for my $stat (@$stats) {
+ next if $stat->{type} ne $op;
+ $running_jobs->{$stat->{device}} = $stat;
}
my $readycounter = 0;
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
+ my $job = $running_jobs->{$job_id};
- if(defined($jobs->{$job}->{complete}) && !defined($running_mirror_jobs->{$job})) {
- print "$job : finished\n";
- delete $jobs->{$job};
+ my $vanished = !defined($job);
+ my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
+ if($complete || ($vanished && $completion eq 'auto')) {
+ print "$job_id: $op-job finished\n";
+ delete $jobs->{$job_id};
next;
}
- die "$job: mirroring has been cancelled\n" if !defined($running_mirror_jobs->{$job});
+ die "$job_id: '$op' has been cancelled\n" if !defined($job);
- my $busy = $running_mirror_jobs->{$job}->{busy};
- my $ready = $running_mirror_jobs->{$job}->{ready};
- if (my $total = $running_mirror_jobs->{$job}->{len}) {
- my $transferred = $running_mirror_jobs->{$job}->{offset} || 0;
+ my $busy = $job->{busy};
+ my $ready = $job->{ready};
+ if (my $total = $job->{len}) {
+ my $transferred = $job->{offset} || 0;
my $remaining = $total - $transferred;
my $percent = sprintf "%.2f", ($transferred * 100 / $total);
- print "$job: transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
+ my $duration = $ctime - $starttime;
+ my $total_h = render_bytes($total, 1);
+ my $transferred_h = render_bytes($transferred, 1);
+
+ my $status = sprintf(
+ "transferred $transferred_h of $total_h ($percent%%) in %s",
+ render_duration($duration),
+ );
+
+ if ($ready) {
+ if ($busy) {
+ $status .= ", still busy"; # shouldn't even happen? but mirror is weird
+ } else {
+ $status .= ", ready";
+ }
+ }
+ print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
+ $jobs->{$job_id}->{ready} = $ready;
}
- $readycounter++ if $running_mirror_jobs->{$job}->{ready};
+ $readycounter++ if $job->{ready};
}
last if scalar(keys %$jobs) == 0;
if ($readycounter == scalar(keys %$jobs)) {
- print "all mirroring jobs are ready \n";
- last if $completion eq 'skip'; #do the complete later
+ print "all '$op' jobs are ready\n";
+
+ # do the complete later (or has already been done)
+ last if $completion eq 'skip' || $completion eq 'auto';
if ($vmiddst && $vmiddst != $vmid) {
my $agent_running = $qga && qga_check_running($vmid);
last;
} else {
- foreach my $job (keys %$jobs) {
+ for my $job_id (sort keys %$jobs) {
# try to switch the disk if source and destination are on the same guest
- print "$job: Completing block job...\n";
+ print "$job_id: Completing block job_id...\n";
my $op;
if ($completion eq 'complete') {
} else {
die "invalid completion value: $completion\n";
}
- eval { mon_cmd($vmid, $op, device => $job) };
+ eval { mon_cmd($vmid, $op, device => $job_id) };
if ($@ =~ m/cannot be completed/) {
- print "$job: Block job cannot be completed, try again.\n";
+ print "$job_id: block job cannot be completed, trying again.\n";
$err_complete++;
}else {
- print "$job: Completed successfully.\n";
- $jobs->{$job}->{complete} = 1;
+ print "$job_id: Completed successfully.\n";
+ $jobs->{$job_id}->{complete} = 1;
}
}
}
if ($err) {
eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
- die "mirroring error: $err";
+ die "block job ($op) error: $err";
}
-
}
sub qemu_blockjobs_cancel {
return $bootorder;
}
+sub register_qmeventd_handle {
+ my ($vmid) = @_;
+
+ my $fh;
+ my $peer = "/var/run/qmeventd.sock";
+ my $count = 0;
+
+ for (;;) {
+ $count++;
+ $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
+ last if $fh;
+ if ($! != EINTR && $! != EAGAIN) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
+ }
+ if ($count > 4) {
+ die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
+ . "after $count retries\n";
+ }
+ usleep(25000);
+ }
+
+ # send handshake to mark VM as backing up
+ print $fh to_json({vzdump => {vmid => "$vmid"}});
+
+ # return handle to be closed later when inhibit is no longer required
+ return $fh;
+}
+
# bash completion helper
sub complete_backup_archives {