]>
git.proxmox.com Git - qemu-server.git/blob - PVE/QemuMigrate.pm
1 package PVE
::QemuMigrate
;
5 use PVE
::AbstractMigrate
;
13 use base
qw(PVE::AbstractMigrate);
15 sub fork_command_pipe
{
16 my ($self, $cmd) = @_;
18 my $reader = IO
::File-
>new();
19 my $writer = IO
::File-
>new();
25 eval { $cpid = open2
($reader, $writer, @$cmd); };
30 if ($orig_pid != $$) {
31 $self->log('err', "can't fork command pipe\n");
38 return { writer
=> $writer, reader
=> $reader, pid
=> $cpid };
41 sub finish_command_pipe
{
42 my ($self, $cmdpipe, $timeout) = @_;
44 my $writer = $cmdpipe->{writer
};
45 my $reader = $cmdpipe->{reader
};
50 my $cpid = $cmdpipe->{pid
};
53 for (my $i = 0; $i < $timeout; $i++) {
54 return if !PVE
::ProcFSTools
::check_process_running
($cpid);
59 $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n");
63 for (my $i = 0; $i < 10; $i++) {
64 return if !PVE
::ProcFSTools
::check_process_running
($cpid);
68 $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n");
74 my ($self, $nodeip, $lport, $rport) = @_;
76 my $cmd = [@{$self->{rem_ssh
}}, '-L', "$lport:localhost:$rport",
79 my $tunnel = $self->fork_command_pipe($cmd);
81 my $reader = $tunnel->{reader
};
85 PVE
::Tools
::run_with_timeout
(60, sub { $helo = <$reader>; });
86 die "no reply\n" if !$helo;
87 die "no quorum on target node\n" if $helo =~ m/^no quorum$/;
88 die "got strange reply from mtunnel ('$helo')\n"
89 if $helo !~ m/^tunnel online$/;
94 $self->finish_command_pipe($tunnel);
95 die "can't open migration tunnel - $err";
101 my ($self, $tunnel) = @_;
103 my $writer = $tunnel->{writer
};
106 PVE
::Tools
::run_with_timeout
(30, sub {
107 print $writer "quit\n";
113 $self->finish_command_pipe($tunnel, 30);
119 my ($self, $vmid, $code, @param) = @_;
121 return PVE
::QemuServer
::lock_config
($vmid, $code, @param);
125 my ($self, $vmid) = @_;
127 my $online = $self->{opts
}->{online
};
129 $self->{storecfg
} = PVE
::Storage
::config
();
132 my $conf = $self->{vmconf
} = PVE
::QemuServer
::load_config
($vmid);
134 PVE
::QemuServer
::check_lock
($conf);
137 if (my $pid = PVE
::QemuServer
::check_running
($vmid)) {
138 die "cant migrate running VM without --online\n" if !$online;
142 if (my $loc_res = PVE
::QemuServer
::check_local_resources
($conf, 1)) {
143 if ($self->{running
} || !$self->{opts
}->{force
}) {
144 die "can't migrate VM which uses local devices\n";
146 $self->log('info', "migrating VM which uses local devices");
151 my $vollist = PVE
::QemuServer
::get_vm_volumes
($conf);
152 PVE
::Storage
::activate_volumes
($self->{storecfg
}, $vollist);
154 # fixme: check if storage is available on both nodes
156 # test ssh connection
157 my $cmd = [ @{$self->{rem_ssh
}}, '/bin/true' ];
158 eval { $self->cmd_quiet($cmd); };
159 die "Can't connect to destination address using public key\n" if $@;
165 my ($self, $vmid) = @_;
167 $self->log('info', "copying disk images");
169 my $conf = $self->{vmconf
};
171 $self->{volumes
} = [];
180 my @sids = PVE
::Storage
::storage_ids
($self->{storecfg
});
181 foreach my $storeid (@sids) {
182 my $scfg = PVE
::Storage
::storage_config
($self->{storecfg
}, $storeid);
183 next if $scfg->{shared
};
184 next if !PVE
::Storage
::storage_check_enabled
($self->{storecfg
}, $storeid, undef, 1);
186 # get list from PVE::Storage (for unused volumes)
187 my $dl = PVE
::Storage
::vdisk_list
($self->{storecfg
}, $storeid, $vmid);
188 PVE
::Storage
::foreach_volid
($dl, sub {
189 my ($volid, $sid, $volname) = @_;
191 # check if storage is available on target node
192 PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid, $self->{node
});
194 $volhash->{$volid} = 1;
198 # and add used,owned/non-shared disks (just to be sure we have all)
201 PVE
::QemuServer
::foreach_drive
($conf, sub {
202 my ($ds, $drive) = @_;
204 my $volid = $drive->{file
};
207 die "cant migrate local file/device '$volid'\n" if $volid =~ m
|^/|;
209 if (PVE
::QemuServer
::drive_is_cdrom
($drive)) {
210 die "cant migrate local cdrom drive\n" if $volid eq 'cdrom';
211 return if $volid eq 'none';
212 $cdromhash->{$volid} = 1;
215 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
217 # check if storage is available on both nodes
218 my $scfg = PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid);
219 PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid, $self->{node
});
221 return if $scfg->{shared
};
223 die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid};
227 my ($path, $owner) = PVE
::Storage
::path
($self->{storecfg
}, $volid);
229 die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n"
230 if !$owner || ($owner != $self->{vmid
});
232 $volhash->{$volid} = 1;
235 if ($self->{running
} && !$sharedvm) {
236 die "can't do online migration - VM uses local disks\n";
239 # do some checks first
240 foreach my $volid (keys %$volhash) {
241 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
242 my $scfg = PVE
::Storage
::storage_config
($self->{storecfg
}, $sid);
244 die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n"
245 if $scfg->{type
} ne 'dir';
248 foreach my $volid (keys %$volhash) {
249 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
250 push @{$self->{volumes
}}, $volid;
251 PVE
::Storage
::storage_migrate
($self->{storecfg
}, $volid, $self->{nodeip
}, $sid);
254 die "Failed to sync data - $@" if $@;
258 my ($self, $vmid) = @_;
260 $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})");
262 my $conf = $self->{vmconf
};
264 # set migrate lock in config file
265 $conf->{lock} = 'migrate';
266 PVE
::QemuServer
::update_config_nolock
($vmid, $conf, 1);
268 sync_disks
($self, $vmid);
273 my ($self, $vmid, $err) = @_;
275 $self->log('info', "aborting phase 1 - cleanup resources");
277 my $conf = $self->{vmconf
};
278 delete $conf->{lock};
279 eval { PVE
::QemuServer
::update_config_nolock
($vmid, $conf, 1) };
281 $self->log('err', $err);
284 if ($self->{volumes
}) {
285 foreach my $volid (@{$self->{volumes
}}) {
286 $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'");
287 # fixme: try to remove ?
293 my ($self, $vmid) = @_;
295 my $conf = $self->{vmconf
};
297 $self->log('info', "starting VM $vmid on remote node '$self->{node}'");
301 my $nodename = PVE
::INotify
::nodename
();
303 ## start on remote node
304 my $cmd = [@{$self->{rem_ssh
}}, 'qm', 'start',
305 $vmid, '--stateuri', 'tcp', '--skiplock', '--migratedfrom', $nodename];
307 PVE
::Tools
::run_command
($cmd, outfunc
=> sub {
310 if ($line =~ m/^migration listens on port (\d+)$/) {
313 }, errfunc
=> sub {});
315 die "unable to detect remote migration port\n" if !$rport;
317 $self->log('info', "starting migration tunnel");
319 ## create tunnel to remote port
320 my $lport = PVE
::QemuServer
::next_migrate_port
();
321 $self->{tunnel
} = $self->fork_tunnel($self->{nodeip
}, $lport, $rport);
323 $self->log('info', "starting online/live migration on port $lport");
328 PVE
::QemuServer
::vm_mon_cmd_nocheck
($vmid, "migrate", uri
=> "tcp:localhost:$lport");
335 my $stat = PVE
::QemuServer
::vm_mon_cmd_nocheck
($vmid, "query-migrate");
336 if ($stat->{status
} =~ m/^(active|completed|failed|cancelled)$/im) {
339 if ($stat->{status
} eq 'completed') {
340 my $delay = time() - $start;
342 my $mbps = sprintf "%.2f", $conf->{memory
}/$delay;
343 $self->log('info', "migration speed: $mbps MB/s");
347 if ($stat->{status
} eq 'failed' || $stat->{status
} eq 'cancelled') {
351 if ($stat->{status
} ne 'active') {
352 $self->log('info', "migration status: $stat->{status}");
356 if ($stat->{ram
}->{transferred
} ne $lstat) {
357 my $trans = $stat->{ram
}->{transferred
} || 0;
358 my $rem = $stat->{ram
}->{remaining
} || 0;
359 my $total = $stat->{ram
}->{total
} || 0;
361 $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " .
362 "remaining ${rem}), total ${total})");
365 $lstat = $stat->{ram
}->{transferred
};
369 die "unable to parse migration status '$stat->{status}' - aborting\n";
375 my ($self, $vmid, $err) = @_;
377 $self->log('info', "aborting phase 2 - cleanup resources");
379 my $conf = $self->{vmconf
};
380 delete $conf->{lock};
381 eval { PVE
::QemuServer
::update_config_nolock
($vmid, $conf, 1) };
383 $self->log('err', $err);
386 ## fixme : vm_stop_cleanup on target vm
392 my ($self, $vmid) = @_;
394 my $volids = $self->{volumes
};
396 # destroy local copies
397 foreach my $volid (@$volids) {
398 eval { PVE
::Storage
::vdisk_free
($self->{storecfg
}, $volid); };
400 $self->log('err', "removing local copy of '$volid' failed - $err");
402 last if $err =~ /^interrupted by signal$/;
408 my ($self, $vmid, $err) = @_;
410 my $conf = $self->{vmconf
};
412 # move config to remote node
413 my $conffile = PVE
::QemuServer
::config_file
($vmid);
414 my $newconffile = PVE
::QemuServer
::config_file
($vmid, $self->{node
});
416 die "Failed to move config to node '$self->{node}' - rename failed: $!\n"
417 if !rename($conffile, $newconffile);
419 # always stop local VM
420 eval { PVE
::QemuServer
::vm_stop
($self->{storecfg
}, $vmid, 1, 1); };
422 $self->log('err', "stopping vm failed - $err");
426 if ($self->{tunnel
}) {
427 eval { finish_tunnel
($self, $self->{tunnel
}); };
429 $self->log('err', $err);
434 # always deactivate volumes - avoid lvm LVs to be active on several nodes
436 my $vollist = PVE
::QemuServer
::get_vm_volumes
($conf);
437 PVE
::Storage
::deactivate_volumes
($self->{storecfg
}, $vollist);
440 $self->log('err', $err);
445 my $cmd = [ @{$self->{rem_ssh
}}, 'qm', 'unlock', $vmid ];
446 $self->cmd_logerr($cmd, errmsg
=> "failed to clear migrate lock");
450 my ($self, $vmid) = @_;