1 package PVE
::QemuMigrate
;
5 use PVE
::AbstractMigrate
;
13 use base
qw(PVE::AbstractMigrate);
15 sub fork_command_pipe
{
18 my $reader = IO
::File-
>new();
19 my $writer = IO
::File-
>new();
25 eval { $cpid = open2
($reader, $writer, @$cmd); };
30 if ($orig_pid != $$) {
31 logmsg
('err', "can't fork command pipe\n");
38 return { writer
=> $writer, reader
=> $reader, pid
=> $cpid };
41 sub finish_command_pipe
{
44 my $writer = $cmdpipe->{writer
};
45 my $reader = $cmdpipe->{reader
};
50 my $cpid = $cmdpipe->{pid
};
52 kill(15, $cpid) if kill(0, $cpid);
57 sub run_with_timeout
{
58 my ($timeout, $code, @param) = @_;
60 die "got timeout\n" if $timeout <= 0;
69 local $SIG{ALRM
} = sub { $sigcount++; die "got timeout\n"; };
70 local $SIG{PIPE
} = sub { $sigcount++; die "broken pipe\n" };
71 local $SIG{__DIE__
}; # see SA bug 4631
73 $prev_alarm = alarm($timeout);
75 $res = &$code(@param);
77 alarm(0); # avoid race conditions
82 alarm($prev_alarm) if defined($prev_alarm);
84 die "unknown error" if $sigcount && !$err; # seems to happen sometimes
92 my ($self, $nodeip, $lport, $rport) = @_;
94 my $cmd = [@{$self->{rem_ssh
}}, '-L', "$lport:localhost:$rport",
97 my $tunnel = fork_command_pipe
($cmd);
99 my $reader = $tunnel->{reader
};
103 run_with_timeout
(60, sub { $helo = <$reader>; });
104 die "no reply\n" if !$helo;
105 die "no quorum on target node\n" if $helo =~ m/^no quorum$/;
106 die "got strange reply from mtunnel ('$helo')\n"
107 if $helo !~ m/^tunnel online$/;
112 finish_command_pipe
($tunnel);
113 die "can't open migration tunnel - $err";
119 my ($self, $tunnel) = @_;
121 my $writer = $tunnel->{writer
};
124 run_with_timeout
(30, sub {
125 print $writer "quit\n";
131 finish_command_pipe
($tunnel);
137 my ($self, $vmid, $code, @param) = @_;
139 return PVE
::QemuServer
::lock_config
($vmid, $code, @param);
143 my ($self, $vmid) = @_;
145 my $online = $self->{opts
}->{online
};
147 $self->{storecfg
} = PVE
::Storage
::config
();
150 my $conf = $self->{vmconf
} = PVE
::QemuServer
::load_config
($vmid);
152 PVE
::QemuServer
::check_lock
($conf);
155 if (my $pid = PVE
::QemuServer
::check_running
($vmid)) {
156 die "cant migrate running VM without --online\n" if !$online;
160 if (my $loc_res = PVE
::QemuServer
::check_local_resources
($conf, 1)) {
161 if ($self->{running
} || !$self->{opts
}->{force
}) {
162 die "can't migrate VM which uses local devices\n";
164 $self->log('info', "migrating VM which uses local devices");
169 my $vollist = PVE
::QemuServer
::get_vm_volumes
($conf);
170 PVE
::Storage
::activate_volumes
($self->{storecfg
}, $vollist);
172 # fixme: check if storage is available on both nodes
174 # test ssh connection
175 my $cmd = [ @{$self->{rem_ssh
}}, '/bin/true' ];
176 eval { $self->cmd_quiet($cmd); };
177 die "Can't connect to destination address using public key\n" if $@;
183 my ($self, $vmid) = @_;
185 $self->log('info', "copying disk images");
187 my $conf = $self->{vmconf
};
189 $self->{volumes
} = [];
198 # get list from PVE::Storage (for unused volumes)
199 my $dl = PVE
::Storage
::vdisk_list
($self->{storecfg
}, undef, $vmid);
200 PVE
::Storage
::foreach_volid
($dl, sub {
201 my ($volid, $sid, $volname) = @_;
203 # check if storage is available on both nodes
204 my $scfg = PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid);
205 PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid, $self->{node
});
207 return if $scfg->{shared
};
209 $volhash->{$volid} = 1;
212 # and add used,owned/non-shared disks (just to be sure we have all)
215 PVE
::QemuServer
::foreach_drive
($conf, sub {
216 my ($ds, $drive) = @_;
218 my $volid = $drive->{file
};
221 die "cant migrate local file/device '$volid'\n" if $volid =~ m
|^/|;
223 if (PVE
::QemuServer
::drive_is_cdrom
($drive)) {
224 die "cant migrate local cdrom drive\n" if $volid eq 'cdrom';
225 return if $volid eq 'none';
226 $cdromhash->{$volid} = 1;
229 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
231 # check if storage is available on both nodes
232 my $scfg = PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid);
233 PVE
::Storage
::storage_check_node
($self->{storecfg
}, $sid, $self->{node
});
235 return if $scfg->{shared
};
237 die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid};
241 my ($path, $owner) = PVE
::Storage
::path
($self->{storecfg
}, $volid);
243 die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n"
244 if !$owner || ($owner != $self->{vmid
});
246 $volhash->{$volid} = 1;
249 if ($self->{running
} && !$sharedvm) {
250 die "can't do online migration - VM uses local disks\n";
253 # do some checks first
254 foreach my $volid (keys %$volhash) {
255 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
256 my $scfg = PVE
::Storage
::storage_config
($self->{storecfg
}, $sid);
258 die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n"
259 if $scfg->{type
} ne 'dir';
262 foreach my $volid (keys %$volhash) {
263 my ($sid, $volname) = PVE
::Storage
::parse_volume_id
($volid);
264 push @{$self->{volumes
}}, $volid;
265 PVE
::Storage
::storage_migrate
($self->{storecfg
}, $volid, $self->{nodeip
}, $sid);
268 die "Failed to sync data - $@" if $@;
272 my ($self, $vmid) = @_;
274 $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})");
276 my $conf = $self->{vmconf
};
278 # set migrate lock in config file
279 PVE
::QemuServer
::change_config_nolock
($vmid, { lock => 'migrate' }, {}, 1);
281 sync_disks
($self, $vmid);
283 # move config to remote node
284 my $conffile = PVE
::QemuServer
::config_file
($vmid);
285 my $newconffile = PVE
::QemuServer
::config_file
($vmid, $self->{node
});
287 die "Failed to move config to node '$self->{node}' - rename failed: $!\n"
288 if !rename($conffile, $newconffile);
292 my ($self, $vmid, $err) = @_;
294 $self->log('info', "aborting phase 1 - cleanup resources");
296 my $unset = { lock => 1 };
297 eval { PVE
::QemuServer
::change_config_nolock
($vmid, {}, $unset, 1) };
299 $self->log('err', $err);
302 if ($self->{volumes
}) {
303 foreach my $volid (@{$self->{volumes
}}) {
304 $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'");
305 # fixme: try to remove ?
311 my ($self, $vmid) = @_;
313 my $conf = $self->{vmconf
};
315 logmsg
('info', "starting VM $vmid on remote node '$self->{node}'");
319 ## start on remote node
320 my $cmd = [@{$self->{rem_ssh
}}, 'qm', 'start',
321 $vmid, '--stateuri', 'tcp', '--skiplock'];
323 $self->cmd($cmd, outfunc
=> sub {
326 if ($line =~ m/^migration listens on port (\d+)$/) {
331 die "unable to detect remote migration port\n" if !$rport;
333 $self->log('info', "starting migration tunnel");
335 ## create tunnel to remote port
336 my $lport = PVE
::QemuServer
::next_migrate_port
();
337 $self->{tunnel
} = $self->fork_tunnel($self->{nodeip
}, $lport, $rport);
339 $self->log('info', "starting online/live migration");
344 PVE
::QemuServer
::vm_monitor_command
($vmid, "migrate -d \"tcp:localhost:$lport\"", 1);
349 my $stat = PVE::QemuServer::vm_monitor_command($vmid, "info migrate
", 1);
350 if ($stat =~ m/^Migration status: (active|completed|failed|cancelled)$/im) {
353 if ($stat ne $lstat) {
354 if ($ms eq 'active') {
355 my ($trans, $rem, $total) = (0, 0, 0);
356 $trans = $1 if $stat =~ m/^transferred ram: (\d+) kbytes$/im;
357 $rem = $1 if $stat =~ m/^remaining ram: (\d+) kbytes$/im;
358 $total = $1 if $stat =~ m/^total ram: (\d+) kbytes$/im;
360 $self->log('info', "migration status
: $ms (transferred
${trans
}KB
, " .
361 "remaining
${rem
}KB
), total
${total
}KB
)");
363 $self->log('info', "migration status
: $ms");
367 if ($ms eq 'completed') {
368 my $delay = time() - $start;
370 my $mbps = sprintf "%.2f", $conf->{memory}/$delay;
371 $self->log('info', "migration speed
: $mbps MB
/s
");
375 if ($ms eq 'failed' || $ms eq 'cancelled') {
379 last if $ms ne 'active';
381 die "unable to parse migration status
'$stat' - aborting
\n";
388 my ($self, $vmid) = @_;
390 my $volids = $self->{volumes};
392 # destroy local copies
393 foreach my $volid (@$volids) {
394 eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
396 $self->log('err', "removing
local copy of
'$volid' failed
- $err");
398 last if $err =~ /^interrupted by signal$/;
402 if ($self->{tunnel}) {
403 eval { finish_tunnel($self, $self->{tunnel}); };
405 $self->log('err', $err);
412 my ($self, $vmid, $err) = @_;
414 my $conf = $self->{vmconf};
416 # always stop local VM
417 eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); };
419 $self->log('err', "stopping vm failed
- $err");
423 # always deactivate volumes - avoid lvm LVs to be active on several nodes
425 my $vollist = PVE::QemuServer::get_vm_volumes($conf);
426 PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist);
429 $self->log('err', $err);
434 my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ];
435 $self->cmd_logerr($cmd, errmsg => "failed to clear migrate
lock");
439 my ($self, $vmid) = @_;