]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuMigrate.pm
d5ee0351a70299d16cf2f487d47469a1eff0f2fa
[qemu-server.git] / PVE / QemuMigrate.pm
1 package PVE::QemuMigrate;
2
3 use strict;
4 use warnings;
5 use POSIX qw(strftime);
6 use IO::File;
7 use IPC::Open2;
8 use PVE::Tools qw(run_command);
9 use PVE::SafeSyslog;
10 use PVE::INotify;
11 use PVE::Cluster;
12 use PVE::Storage;
13 use PVE::QemuServer;
14
15 my $delayed_interrupt = 0;
16
17 # blowfish is a fast block cipher, much faster then 3des
18 my @ssh_opts = ('-c', 'blowfish', '-o', 'BatchMode=yes');
19 my @ssh_cmd = ('/usr/bin/ssh', @ssh_opts);
20 my @scp_cmd = ('/usr/bin/scp', @ssh_opts);
21 my $qm_cmd = '/usr/sbin/qm';
22
23 sub logmsg {
24 my ($level, $msg) = @_;
25
26 chomp $msg;
27
28 return if !$msg;
29
30 my $tstr = strftime("%b %d %H:%M:%S", localtime);
31
32 syslog($level, $msg);
33
34 foreach my $line (split (/\n/, $msg)) {
35 print STDOUT "$tstr $line\n";
36 }
37 \*STDOUT->flush();
38 }
39
40 sub eval_int {
41 my ($func) = @_;
42
43 eval {
44 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = sub {
45 $delayed_interrupt = 0;
46 logmsg('err', "received interrupt");
47 die "interrupted by signal\n";
48 };
49 local $SIG{PIPE} = sub {
50 $delayed_interrupt = 0;
51 logmsg('err', "received broken pipe interrupt");
52 die "interrupted by signal\n";
53 };
54
55 my $di = $delayed_interrupt;
56 $delayed_interrupt = 0;
57
58 die "interrupted by signal\n" if $di;
59
60 &$func();
61 };
62 }
63
64 sub fork_command_pipe {
65 my ($cmd) = @_;
66
67 my $reader = IO::File->new();
68 my $writer = IO::File->new();
69
70 my $orig_pid = $$;
71
72 my $cpid;
73
74 eval { $cpid = open2($reader, $writer, @$cmd); };
75
76 my $err = $@;
77
78 # catch exec errors
79 if ($orig_pid != $$) {
80 logmsg('err', "can't fork command pipe\n");
81 POSIX::_exit(1);
82 kill('KILL', $$);
83 }
84
85 die $err if $err;
86
87 return { writer => $writer, reader => $reader, pid => $cpid };
88 }
89
90 sub finish_command_pipe {
91 my $cmdpipe = shift;
92
93 my $writer = $cmdpipe->{writer};
94 my $reader = $cmdpipe->{reader};
95
96 $writer->close();
97 $reader->close();
98
99 my $cpid = $cmdpipe->{pid};
100
101 kill(15, $cpid) if kill(0, $cpid);
102
103 waitpid($cpid, 0);
104 }
105
106 sub run_with_timeout {
107 my ($timeout, $code, @param) = @_;
108
109 die "got timeout\n" if $timeout <= 0;
110
111 my $prev_alarm;
112
113 my $sigcount = 0;
114
115 my $res;
116
117 eval {
118 local $SIG{ALRM} = sub { $sigcount++; die "got timeout\n"; };
119 local $SIG{PIPE} = sub { $sigcount++; die "broken pipe\n" };
120 local $SIG{__DIE__}; # see SA bug 4631
121
122 $prev_alarm = alarm($timeout);
123
124 $res = &$code(@param);
125
126 alarm(0); # avoid race conditions
127 };
128
129 my $err = $@;
130
131 alarm($prev_alarm) if defined($prev_alarm);
132
133 die "unknown error" if $sigcount && !$err; # seems to happen sometimes
134
135 die $err if $err;
136
137 return $res;
138 }
139
140 sub fork_tunnel {
141 my ($nodeip, $lport, $rport) = @_;
142
143 my $cmd = [@ssh_cmd, '-o', 'BatchMode=yes',
144 '-L', "$lport:localhost:$rport", $nodeip,
145 'qm', 'mtunnel' ];
146
147 my $tunnel = fork_command_pipe($cmd);
148
149 my $reader = $tunnel->{reader};
150
151 my $helo;
152 eval {
153 run_with_timeout(60, sub { $helo = <$reader>; });
154 die "no reply\n" if !$helo;
155 die "no quorum on target node\n" if $helo =~ m/^no quorum$/;
156 die "got strange reply from mtunnel ('$helo')\n"
157 if $helo !~ m/^tunnel online$/;
158 };
159 my $err = $@;
160
161 if ($err) {
162 finish_command_pipe($tunnel);
163 die "can't open migration tunnel - $err";
164 }
165 return $tunnel;
166 }
167
168 sub finish_tunnel {
169 my $tunnel = shift;
170
171 my $writer = $tunnel->{writer};
172
173 eval {
174 run_with_timeout(30, sub {
175 print $writer "quit\n";
176 $writer->flush();
177 });
178 };
179 my $err = $@;
180
181 finish_command_pipe($tunnel);
182
183 die $err if $err;
184 }
185
186 sub migrate {
187 my ($node, $nodeip, $vmid, $online, $force) = @_;
188
189 my $starttime = time();
190
191 my $rem_ssh = [@ssh_cmd, "root\@$nodeip"];
192
193 local $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = $SIG{HUP} = $SIG{PIPE} = sub {
194 logmsg('err', "received interrupt - delayed");
195 $delayed_interrupt = 1;
196 };
197
198 local $ENV{RSYNC_RSH} = join(' ', @ssh_cmd);
199
200 my $session = {
201 vmid => $vmid,
202 node => $node,
203 nodeip => $nodeip,
204 force => $force,
205 storecfg => PVE::Storage::config(),
206 rem_ssh => $rem_ssh,
207 };
208
209 my $errors;
210
211 # lock config during migration
212 eval { PVE::QemuServer::lock_config($vmid, sub {
213
214 eval_int(sub { prepare($session); });
215 die $@ if $@;
216
217 my $conf = PVE::QemuServer::load_config($vmid);
218
219 PVE::QemuServer::check_lock($conf);
220
221 my $running = 0;
222 if (my $pid = PVE::QemuServer::check_running($vmid)) {
223 die "cant migrate running VM without --online\n" if !$online;
224 $running = $pid;
225 }
226
227 my $rhash = {};
228 eval_int (sub { phase1($session, $conf, $rhash, $running); });
229 my $err = $@;
230
231 if ($err) {
232 if ($rhash->{clearlock}) {
233 my $unset = { lock => 1 };
234 eval { PVE::QemuServer::change_config_nolock($session->{vmid}, {}, $unset, 1) };
235 logmsg('err', $@) if $@;
236 }
237 if ($rhash->{volumes}) {
238 foreach my $volid (@{$rhash->{volumes}}) {
239 logmsg('err', "found stale volume copy '$volid' on node '$session->{node}'");
240 }
241 }
242 die $err;
243 }
244
245 # vm is now owned by other node
246 my $volids = $rhash->{volumes};
247
248 if ($running) {
249
250 $rhash = {};
251 eval_int(sub { phase2($session, $conf, $rhash); });
252 my $err = $@;
253
254 # always kill tunnel
255 if ($rhash->{tunnel}) {
256 eval_int(sub { finish_tunnel($rhash->{tunnel}) });
257 if ($@) {
258 logmsg('err', "stopping tunnel failed - $@");
259 $errors = 1;
260 }
261 }
262
263 # fixme: ther is no config file, so this will never work
264 # fixme: use kill(9, $running) to make sure it is stopped
265 # always stop local VM - no interrupts possible
266 eval { PVE::QemuServer::vm_stop($session->{vmid}, 1); };
267 if ($@) {
268 logmsg('err', "stopping vm failed - $@");
269 $errors = 1;
270 }
271
272 if ($err) {
273 $errors = 1;
274 logmsg('err', "online migrate failure - $err");
275 }
276 }
277
278 # finalize -- clear migrate lock
279 eval_int(sub {
280 my $cmd = [ @{$session->{rem_ssh}}, $qm_cmd, 'unlock', $session->{vmid} ];
281 run_command($cmd);
282 });
283 if ($@) {
284 logmsg('err', "failed to clear migrate lock - $@");
285 $errors = 1;
286 }
287
288 # destroy local copies
289 foreach my $volid (@$volids) {
290 eval_int(sub { PVE::Storage::vdisk_free($session->{storecfg}, $volid); });
291 my $err = $@;
292
293 if ($err) {
294 logmsg('err', "removing local copy of '$volid' failed - $err");
295 $errors = 1;
296
297 last if $err =~ /^interrupted by signal$/;
298 }
299 }
300 })};
301
302 my $err = $@;
303
304 my $delay = time() - $starttime;
305 my $mins = int($delay/60);
306 my $secs = $delay - $mins*60;
307 my $hours = int($mins/60);
308 $mins = $mins - $hours*60;
309
310 my $duration = sprintf "%02d:%02d:%02d", $hours, $mins, $secs;
311
312 if ($err) {
313 my $msg = "migration aborted (duration $duration): $err\n";
314 logmsg('err', $msg);
315 die $msg;
316 }
317
318 if ($errors) {
319 my $msg = "migration finished with problems (duration $duration)\n";
320 logmsg('err', $msg);
321 die $msg;
322 }
323
324 logmsg('info', "migration finished successfuly (duration $duration)");
325 }
326
327 sub prepare {
328 my ($session) = @_;
329
330 my $conffile = PVE::QemuServer::config_file($session->{vmid});
331 die "VM $session->{vmid} does not exist on this node\n" if ! -f $conffile;
332
333 # test ssh connection
334 my $cmd = [ @{$session->{rem_ssh}}, '/bin/true' ];
335 eval { run_command($cmd); };
336 die "Can't connect to destination address using public key\n" if $@;
337 }
338
339 sub sync_disks {
340 my ($session, $conf, $rhash, $running) = @_;
341
342 logmsg('info', "copying disk images");
343
344 my $res = [];
345
346 eval {
347
348 my $volhash = {};
349 my $cdromhash = {};
350
351 # get list from PVE::Storage (for unused volumes)
352 my $dl = PVE::Storage::vdisk_list($session->{storecfg}, undef, $session->{vmid});
353 PVE::Storage::foreach_volid($dl, sub {
354 my ($volid, $sid, $volname) = @_;
355
356 my $scfg = PVE::Storage::storage_config($session->{storecfg}, $sid);
357
358 return if $scfg->{shared};
359
360 $volhash->{$volid} = 1;
361 });
362
363 # and add used,owned/non-shared disks (just to be sure we have all)
364
365 my $sharedvm = 1;
366 PVE::QemuServer::foreach_drive($conf, sub {
367 my ($ds, $drive) = @_;
368
369 my $volid = $drive->{file};
370 return if !$volid;
371
372 die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|;
373
374 if (PVE::QemuServer::drive_is_cdrom($drive)) {
375 die "cant migrate local cdrom drive\n" if $volid eq 'cdrom';
376 return if $volid eq 'none';
377 $cdromhash->{$volid} = 1;
378 }
379
380 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
381
382 my $scfg = PVE::Storage::storage_config($session->{storecfg}, $sid);
383
384 return if $scfg->{shared};
385
386 die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid};
387
388 $sharedvm = 0;
389
390 my ($path, $owner) = PVE::Storage::path($session->{storecfg}, $volid);
391
392 die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n"
393 if !$owner || ($owner != $session->{vmid});
394
395 $volhash->{$volid} = 1;
396 });
397
398 if ($running && !$sharedvm) {
399 die "can't do online migration - VM uses local disks\n";
400 }
401
402 # do some checks first
403 foreach my $volid (keys %$volhash) {
404 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
405 my $scfg = PVE::Storage::storage_config($session->{storecfg}, $sid);
406
407 die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n"
408 if $scfg->{type} ne 'dir';
409 }
410
411 foreach my $volid (keys %$volhash) {
412 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
413 push @{$rhash->{volumes}}, $volid;
414 PVE::Storage::storage_migrate($session->{storecfg}, $volid, $session->{nodeip}, $sid);
415 }
416 };
417 die "Failed to sync data - $@" if $@;
418 }
419
420 sub phase1 {
421 my ($session, $conf, $rhash, $running) = @_;
422
423 logmsg('info', "starting migration of VM $session->{vmid} to node '$session->{node}' ($session->{nodeip})");
424
425 if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) {
426 if ($running || !$session->{force}) {
427 die "can't migrate VM which uses local devices\n";
428 } else {
429 logmsg('info', "migrating VM which uses local devices");
430 }
431 }
432
433 # set migrate lock in config file
434 $rhash->{clearlock} = 1;
435
436 PVE::QemuServer::change_config_nolock($session->{vmid}, { lock => 'migrate' }, {}, 1);
437
438 sync_disks($session, $conf, $rhash, $running);
439
440 # move config to remote node
441 my $conffile = PVE::QemuServer::config_file($session->{vmid});
442 my $newconffile = PVE::QemuServer::config_file($session->{vmid}, $session->{node});
443
444 die "Failed to move config to node '$session->{node}' - rename failed: $!\n"
445 if !rename($conffile, $newconffile);
446 };
447
448 sub phase2 {
449 my ($session, $conf, $rhash) = shift;
450
451 logmsg('info', "starting VM on remote node '$session->{node}'");
452
453 my $rport;
454
455 ## start on remote node
456 my $cmd = [@{$session->{rem_ssh}}, $qm_cmd, 'start',
457 $session->{vmid}, '--stateuri', 'tcp', '--skiplock'];
458
459 run_command($cmd, outfunc => sub {
460 my $line = shift;
461
462 if ($line =~ m/^migration listens on port (\d+)$/) {
463 $rport = $1;
464 }
465 });
466
467 die "unable to detect remote migration port\n" if !$rport;
468
469 logmsg('info', "starting migration tunnel");
470
471 ## create tunnel to remote port
472 my $lport = PVE::QemuServer::next_migrate_port();
473 $rhash->{tunnel} = fork_tunnel($session->{nodeip}, $lport, $rport);
474
475 logmsg('info', "starting online/live migration");
476 # start migration
477
478 my $start = time();
479
480 PVE::QemuServer::vm_monitor_command($session->{vmid}, "migrate -d \"tcp:localhost:$lport\"");
481
482 my $lstat = '';
483 while (1) {
484 sleep (2);
485 my $stat = PVE::QemuServer::vm_monitor_command($session->{vmid}, "info migrate", 1);
486 if ($stat =~ m/^Migration status: (active|completed|failed|cancelled)$/im) {
487 my $ms = $1;
488
489 if ($stat ne $lstat) {
490 if ($ms eq 'active') {
491 my ($trans, $rem, $total) = (0, 0, 0);
492 $trans = $1 if $stat =~ m/^transferred ram: (\d+) kbytes$/im;
493 $rem = $1 if $stat =~ m/^remaining ram: (\d+) kbytes$/im;
494 $total = $1 if $stat =~ m/^total ram: (\d+) kbytes$/im;
495
496 logmsg('info', "migration status: $ms (transferred ${trans}KB, " .
497 "remaining ${rem}KB), total ${total}KB)");
498 } else {
499 logmsg('info', "migration status: $ms");
500 }
501 }
502
503 if ($ms eq 'completed') {
504 my $delay = time() - $start;
505 if ($delay > 0) {
506 my $mbps = sprintf "%.2f", $conf->{memory}/$delay;
507 logmsg('info', "migration speed: $mbps MB/s");
508 }
509 }
510
511 if ($ms eq 'failed' || $ms eq 'cancelled') {
512 die "aborting\n"
513 }
514
515 last if $ms ne 'active';
516 } else {
517 die "unable to parse migration status '$stat' - aborting\n";
518 }
519 $lstat = $stat;
520 };
521 }