]> git.proxmox.com Git - qemu-server.git/commitdiff
api: clone: fork before locking
authorFabian Ebner <f.ebner@proxmox.com>
Thu, 27 Jan 2022 14:01:53 +0000 (15:01 +0100)
committerFabian Grünbichler <f.gruenbichler@proxmox.com>
Mon, 31 Jan 2022 12:10:33 +0000 (13:10 +0100)
using the familiar early+repeated checks pattern from other API calls.
Only intended functional changes are with regard to locking/forking.

For a full clone of a running VM without guest agent, this also fixes
issuing vm_{resume,suspend} calls for drive mirror completion.
Previously, those just timed out, because of not getting the lock:

> create full clone of drive scsi0 (rbdkvm:vm-104-disk-0)
> Formatting '/var/lib/vz/images/105/vm-105-disk-0.raw', fmt=raw
> size=4294967296 preallocation=off
> drive mirror is starting for drive-scsi0
> drive-scsi0: transferred 2.0 MiB of 4.0 GiB (0.05%) in 0s
> drive-scsi0: transferred 635.0 MiB of 4.0 GiB (15.50%) in 1s
> drive-scsi0: transferred 1.6 GiB of 4.0 GiB (40.50%) in 2s
> drive-scsi0: transferred 3.6 GiB of 4.0 GiB (90.23%) in 3s
> drive-scsi0: transferred 4.0 GiB of 4.0 GiB (100.00%) in 4s, ready
> all 'mirror' jobs are ready
> suspend vm
> trying to acquire lock...
> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout
> drive-scsi0: Cancelling block job
> drive-scsi0: Done.
> resume vm
> trying to acquire lock...
> can't lock file '/var/lock/qemu-server/lock-104.conf' - got timeout

Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
PVE/API2/Qemu.pm

index 6992f6f766af48cc51d04ac408895d01d9f9594a..38e08c887c1865d56c473d7e433bf6531225ee99 100644 (file)
@@ -3079,9 +3079,7 @@ __PACKAGE__->register_method({
 
        my $running = PVE::QemuServer::check_running($vmid) || 0;
 
-       my $clonefn = sub {
-           # do all tests after lock but before forking worker - if possible
-
+       my $load_and_check = sub {
            my $conf = PVE::QemuConfig->load_config($vmid);
            PVE::QemuConfig->check_lock($conf);
 
@@ -3091,7 +3089,7 @@ __PACKAGE__->register_method({
            die "snapshot '$snapname' does not exist\n"
                if $snapname && !defined( $conf->{snapshots}->{$snapname});
 
-           my $full = extract_param($param, 'full') // !PVE::QemuConfig->is_template($conf);
+           my $full = $param->{full} // !PVE::QemuConfig->is_template($conf);
 
            die "parameter 'storage' not allowed for linked clones\n"
                if defined($storage) && !$full;
@@ -3156,7 +3154,13 @@ __PACKAGE__->register_method({
                }
            }
 
-            # auto generate a new uuid
+           return ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone);
+       };
+
+       my $clonefn = sub {
+           my ($conffile, $newconf, $oldconf, $vollist, $drives, $fullclone) = $load_and_check->();
+
+           # auto generate a new uuid
            my $smbios1 = PVE::QemuServer::parse_smbios1($newconf->{smbios1} || '');
            $smbios1->{uuid} = PVE::QemuServer::generate_uuid();
            $newconf->{smbios1} = PVE::QemuServer::print_smbios1($smbios1);
@@ -3181,105 +3185,99 @@ __PACKAGE__->register_method({
            # FIXME use PVE::QemuConfig->create_and_lock_config and adapt code
            PVE::Tools::file_set_contents($conffile, "# qmclone temporary file\nlock: clone\n");
 
-           my $realcmd = sub {
-               my $upid = shift;
-
-               my $newvollist = [];
-               my $jobs = {};
-
-               eval {
-                   local $SIG{INT} =
-                       local $SIG{TERM} =
-                       local $SIG{QUIT} =
-                       local $SIG{HUP} = sub { die "interrupted by signal\n"; };
+           PVE::Firewall::clone_vmfw_conf($vmid, $newid);
 
-                   PVE::Storage::activate_volumes($storecfg, $vollist, $snapname);
+           my $newvollist = [];
+           my $jobs = {};
 
-                   my $bwlimit = extract_param($param, 'bwlimit');
+           eval {
+               local $SIG{INT} =
+                   local $SIG{TERM} =
+                   local $SIG{QUIT} =
+                   local $SIG{HUP} = sub { die "interrupted by signal\n"; };
 
-                   my $total_jobs = scalar(keys %{$drives});
-                   my $i = 1;
+               PVE::Storage::activate_volumes($storecfg, $vollist, $snapname);
 
-                   foreach my $opt (sort keys %$drives) {
-                       my $drive = $drives->{$opt};
-                       my $skipcomplete = ($total_jobs != $i); # finish after last drive
-                       my $completion = $skipcomplete ? 'skip' : 'complete';
+               my $bwlimit = extract_param($param, 'bwlimit');
 
-                       my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
-                       my $storage_list = [ $src_sid ];
-                       push @$storage_list, $storage if defined($storage);
-                       my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit);
-
-                       my $newdrive = PVE::QemuServer::clone_disk(
-                           $storecfg,
-                           $vmid,
-                           $running,
-                           $opt,
-                           $drive,
-                           $snapname,
-                           $newid,
-                           $storage,
-                           $format,
-                           $fullclone->{$opt},
-                           $newvollist,
-                           $jobs,
-                           $completion,
-                           $oldconf->{agent},
-                           $clonelimit,
-                           $oldconf
-                       );
+               my $total_jobs = scalar(keys %{$drives});
+               my $i = 1;
 
-                       $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
+               foreach my $opt (sort keys %$drives) {
+                   my $drive = $drives->{$opt};
+                   my $skipcomplete = ($total_jobs != $i); # finish after last drive
+                   my $completion = $skipcomplete ? 'skip' : 'complete';
 
-                       PVE::QemuConfig->write_config($newid, $newconf);
-                       $i++;
-                   }
+                   my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
+                   my $storage_list = [ $src_sid ];
+                   push @$storage_list, $storage if defined($storage);
+                   my $clonelimit = PVE::Storage::get_bandwidth_limit('clone', $storage_list, $bwlimit);
 
-                   delete $newconf->{lock};
+                   my $newdrive = PVE::QemuServer::clone_disk(
+                       $storecfg,
+                       $vmid,
+                       $running,
+                       $opt,
+                       $drive,
+                       $snapname,
+                       $newid,
+                       $storage,
+                       $format,
+                       $fullclone->{$opt},
+                       $newvollist,
+                       $jobs,
+                       $completion,
+                       $oldconf->{agent},
+                       $clonelimit,
+                       $oldconf
+                   );
 
-                   # do not write pending changes
-                   if (my @changes = keys %{$newconf->{pending}}) {
-                       my $pending = join(',', @changes);
-                       warn "found pending changes for '$pending', discarding for clone\n";
-                       delete $newconf->{pending};
-                   }
+                   $newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
 
                    PVE::QemuConfig->write_config($newid, $newconf);
+                   $i++;
+               }
 
-                    if ($target) {
-                       # always deactivate volumes - avoid lvm LVs to be active on several nodes
-                       PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running;
-                       PVE::Storage::deactivate_volumes($storecfg, $newvollist);
+               delete $newconf->{lock};
 
-                       my $newconffile = PVE::QemuConfig->config_file($newid, $target);
-                       die "Failed to move config to node '$target' - rename failed: $!\n"
-                           if !rename($conffile, $newconffile);
-                   }
+               # do not write pending changes
+               if (my @changes = keys %{$newconf->{pending}}) {
+                   my $pending = join(',', @changes);
+                   warn "found pending changes for '$pending', discarding for clone\n";
+                   delete $newconf->{pending};
+               }
 
-                   PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
-               };
-               if (my $err = $@) {
-                   eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
-                   sleep 1; # some storage like rbd need to wait before release volume - really?
+               PVE::QemuConfig->write_config($newid, $newconf);
 
-                   foreach my $volid (@$newvollist) {
-                       eval { PVE::Storage::vdisk_free($storecfg, $volid); };
-                       warn $@ if $@;
-                   }
+               if ($target) {
+                   # always deactivate volumes - avoid lvm LVs to be active on several nodes
+                   PVE::Storage::deactivate_volumes($storecfg, $vollist, $snapname) if !$running;
+                   PVE::Storage::deactivate_volumes($storecfg, $newvollist);
 
-                   PVE::Firewall::remove_vmfw_conf($newid);
+                   my $newconffile = PVE::QemuConfig->config_file($newid, $target);
+                   die "Failed to move config to node '$target' - rename failed: $!\n"
+                       if !rename($conffile, $newconffile);
+               }
 
-                   unlink $conffile; # avoid races -> last thing before die
+               PVE::AccessControl::add_vm_to_pool($newid, $pool) if $pool;
+           };
+           if (my $err = $@) {
+               eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
+               sleep 1; # some storage like rbd need to wait before release volume - really?
 
-                   die "clone failed: $err";
+               foreach my $volid (@$newvollist) {
+                   eval { PVE::Storage::vdisk_free($storecfg, $volid); };
+                   warn $@ if $@;
                }
 
-               return;
-           };
+               PVE::Firewall::remove_vmfw_conf($newid);
 
-           PVE::Firewall::clone_vmfw_conf($vmid, $newid);
+               unlink $conffile; # avoid races -> last thing before die
 
-           return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $realcmd);
+               die "clone failed: $err";
+           }
+
+           return;
        };
 
        # Aquire exclusive lock lock for $newid
@@ -3287,12 +3285,18 @@ __PACKAGE__->register_method({
            return PVE::QemuConfig->lock_config_full($newid, 1, $clonefn);
        };
 
-       # exclusive lock if VM is running - else shared lock is enough;
-       if ($running) {
-           return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm);
-       } else {
-           return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm);
-       }
+       my $lock_source_vm = sub {
+           # exclusive lock if VM is running - else shared lock is enough;
+           if ($running) {
+               return PVE::QemuConfig->lock_config_full($vmid, 1, $lock_target_vm);
+           } else {
+               return PVE::QemuConfig->lock_config_shared($vmid, 1, $lock_target_vm);
+           }
+       };
+
+       $load_and_check->(); # early checks before forking/locking
+
+       return $rpcenv->fork_worker('qmclone', $vmid, $authuser, $lock_source_vm);
     }});
 
 __PACKAGE__->register_method({