]>
Commit | Line | Data |
---|---|---|
1 | package PVE::QemuMigrate; | |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use PVE::AbstractMigrate; | |
6 | use IO::File; | |
7 | use IPC::Open2; | |
8 | use PVE::INotify; | |
9 | use PVE::Cluster; | |
10 | use PVE::Storage; | |
11 | use PVE::QemuServer; | |
12 | use Time::HiRes qw( usleep ); | |
13 | ||
14 | use base qw(PVE::AbstractMigrate); | |
15 | ||
16 | sub fork_command_pipe { | |
17 | my ($self, $cmd) = @_; | |
18 | ||
19 | my $reader = IO::File->new(); | |
20 | my $writer = IO::File->new(); | |
21 | ||
22 | my $orig_pid = $$; | |
23 | ||
24 | my $cpid; | |
25 | ||
26 | eval { $cpid = open2($reader, $writer, @$cmd); }; | |
27 | ||
28 | my $err = $@; | |
29 | ||
30 | # catch exec errors | |
31 | if ($orig_pid != $$) { | |
32 | $self->log('err', "can't fork command pipe\n"); | |
33 | POSIX::_exit(1); | |
34 | kill('KILL', $$); | |
35 | } | |
36 | ||
37 | die $err if $err; | |
38 | ||
39 | return { writer => $writer, reader => $reader, pid => $cpid }; | |
40 | } | |
41 | ||
42 | sub finish_command_pipe { | |
43 | my ($self, $cmdpipe, $timeout) = @_; | |
44 | ||
45 | my $writer = $cmdpipe->{writer}; | |
46 | my $reader = $cmdpipe->{reader}; | |
47 | ||
48 | $writer->close(); | |
49 | $reader->close(); | |
50 | ||
51 | my $cpid = $cmdpipe->{pid}; | |
52 | ||
53 | if ($timeout) { | |
54 | for (my $i = 0; $i < $timeout; $i++) { | |
55 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
56 | sleep(1); | |
57 | } | |
58 | } | |
59 | ||
60 | $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n"); | |
61 | kill(15, $cpid); | |
62 | ||
63 | # wait again | |
64 | for (my $i = 0; $i < 10; $i++) { | |
65 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
66 | sleep(1); | |
67 | } | |
68 | ||
69 | $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n"); | |
70 | kill 9, $cpid; | |
71 | sleep 1; | |
72 | } | |
73 | ||
74 | sub fork_tunnel { | |
75 | my ($self, $nodeip, $lport, $rport) = @_; | |
76 | ||
77 | my $cmd = [@{$self->{rem_ssh}}, '-L', "$lport:localhost:$rport", | |
78 | 'qm', 'mtunnel' ]; | |
79 | ||
80 | my $tunnel = $self->fork_command_pipe($cmd); | |
81 | ||
82 | my $reader = $tunnel->{reader}; | |
83 | ||
84 | my $helo; | |
85 | eval { | |
86 | PVE::Tools::run_with_timeout(60, sub { $helo = <$reader>; }); | |
87 | die "no reply\n" if !$helo; | |
88 | die "no quorum on target node\n" if $helo =~ m/^no quorum$/; | |
89 | die "got strange reply from mtunnel ('$helo')\n" | |
90 | if $helo !~ m/^tunnel online$/; | |
91 | }; | |
92 | my $err = $@; | |
93 | ||
94 | if ($err) { | |
95 | $self->finish_command_pipe($tunnel); | |
96 | die "can't open migration tunnel - $err"; | |
97 | } | |
98 | return $tunnel; | |
99 | } | |
100 | ||
101 | sub finish_tunnel { | |
102 | my ($self, $tunnel) = @_; | |
103 | ||
104 | my $writer = $tunnel->{writer}; | |
105 | ||
106 | eval { | |
107 | PVE::Tools::run_with_timeout(30, sub { | |
108 | print $writer "quit\n"; | |
109 | $writer->flush(); | |
110 | }); | |
111 | }; | |
112 | my $err = $@; | |
113 | ||
114 | $self->finish_command_pipe($tunnel, 30); | |
115 | ||
116 | die $err if $err; | |
117 | } | |
118 | ||
119 | sub lock_vm { | |
120 | my ($self, $vmid, $code, @param) = @_; | |
121 | ||
122 | return PVE::QemuServer::lock_config($vmid, $code, @param); | |
123 | } | |
124 | ||
125 | sub prepare { | |
126 | my ($self, $vmid) = @_; | |
127 | ||
128 | my $online = $self->{opts}->{online}; | |
129 | ||
130 | $self->{storecfg} = PVE::Storage::config(); | |
131 | ||
132 | # test is VM exist | |
133 | my $conf = $self->{vmconf} = PVE::QemuServer::load_config($vmid); | |
134 | ||
135 | PVE::QemuServer::check_lock($conf); | |
136 | ||
137 | my $running = 0; | |
138 | if (my $pid = PVE::QemuServer::check_running($vmid)) { | |
139 | die "cant migrate running VM without --online\n" if !$online; | |
140 | $running = $pid; | |
141 | } | |
142 | ||
143 | if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { | |
144 | if ($self->{running} || !$self->{opts}->{force}) { | |
145 | die "can't migrate VM which uses local devices\n"; | |
146 | } else { | |
147 | $self->log('info', "migrating VM which uses local devices"); | |
148 | } | |
149 | } | |
150 | ||
151 | # activate volumes | |
152 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
153 | PVE::Storage::activate_volumes($self->{storecfg}, $vollist); | |
154 | ||
155 | # fixme: check if storage is available on both nodes | |
156 | ||
157 | # test ssh connection | |
158 | my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; | |
159 | eval { $self->cmd_quiet($cmd); }; | |
160 | die "Can't connect to destination address using public key\n" if $@; | |
161 | ||
162 | return $running; | |
163 | } | |
164 | ||
165 | sub sync_disks { | |
166 | my ($self, $vmid) = @_; | |
167 | ||
168 | $self->log('info', "copying disk images"); | |
169 | ||
170 | my $conf = $self->{vmconf}; | |
171 | ||
172 | $self->{volumes} = []; | |
173 | ||
174 | my $res = []; | |
175 | ||
176 | eval { | |
177 | ||
178 | my $volhash = {}; | |
179 | my $cdromhash = {}; | |
180 | ||
181 | my $sharedvm = 1; | |
182 | ||
183 | my @sids = PVE::Storage::storage_ids($self->{storecfg}); | |
184 | foreach my $storeid (@sids) { | |
185 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid); | |
186 | next if $scfg->{shared}; | |
187 | next if !PVE::Storage::storage_check_enabled($self->{storecfg}, $storeid, undef, 1); | |
188 | ||
189 | # get list from PVE::Storage (for unused volumes) | |
190 | my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid); | |
191 | PVE::Storage::foreach_volid($dl, sub { | |
192 | my ($volid, $sid, $volname) = @_; | |
193 | ||
194 | # check if storage is available on target node | |
195 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
196 | ||
197 | $volhash->{$volid} = 1; | |
198 | $sharedvm = 0; # there is a non-shared disk | |
199 | }); | |
200 | } | |
201 | ||
202 | # and add used, owned/non-shared disks (just to be sure we have all) | |
203 | ||
204 | PVE::QemuServer::foreach_volid($conf, sub { | |
205 | my ($volid, $is_cdrom) = @_; | |
206 | ||
207 | return if !$volid; | |
208 | ||
209 | die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|; | |
210 | ||
211 | if ($is_cdrom) { | |
212 | die "cant migrate local cdrom drive\n" if $volid eq 'cdrom'; | |
213 | return if $volid eq 'none'; | |
214 | $cdromhash->{$volid} = 1; | |
215 | } | |
216 | ||
217 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
218 | ||
219 | # check if storage is available on both nodes | |
220 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
221 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
222 | ||
223 | return if $scfg->{shared}; | |
224 | ||
225 | die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid}; | |
226 | ||
227 | $sharedvm = 0; | |
228 | ||
229 | my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); | |
230 | ||
231 | die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n" | |
232 | if !$owner || ($owner != $self->{vmid}); | |
233 | ||
234 | $volhash->{$volid} = 1; | |
235 | }); | |
236 | ||
237 | if ($self->{running} && !$sharedvm) { | |
238 | die "can't do online migration - VM uses local disks\n"; | |
239 | } | |
240 | ||
241 | # do some checks first | |
242 | foreach my $volid (keys %$volhash) { | |
243 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
244 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); | |
245 | ||
246 | die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n" | |
247 | if $scfg->{type} ne 'dir'; | |
248 | } | |
249 | ||
250 | foreach my $volid (keys %$volhash) { | |
251 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
252 | push @{$self->{volumes}}, $volid; | |
253 | PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); | |
254 | } | |
255 | }; | |
256 | die "Failed to sync data - $@" if $@; | |
257 | } | |
258 | ||
259 | sub phase1 { | |
260 | my ($self, $vmid) = @_; | |
261 | ||
262 | $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); | |
263 | ||
264 | my $conf = $self->{vmconf}; | |
265 | ||
266 | # set migrate lock in config file | |
267 | $conf->{lock} = 'migrate'; | |
268 | PVE::QemuServer::update_config_nolock($vmid, $conf, 1); | |
269 | ||
270 | sync_disks($self, $vmid); | |
271 | ||
272 | }; | |
273 | ||
274 | sub phase1_cleanup { | |
275 | my ($self, $vmid, $err) = @_; | |
276 | ||
277 | $self->log('info', "aborting phase 1 - cleanup resources"); | |
278 | ||
279 | my $conf = $self->{vmconf}; | |
280 | delete $conf->{lock}; | |
281 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
282 | if (my $err = $@) { | |
283 | $self->log('err', $err); | |
284 | } | |
285 | ||
286 | if ($self->{volumes}) { | |
287 | foreach my $volid (@{$self->{volumes}}) { | |
288 | $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); | |
289 | # fixme: try to remove ? | |
290 | } | |
291 | } | |
292 | } | |
293 | ||
294 | sub phase2 { | |
295 | my ($self, $vmid) = @_; | |
296 | ||
297 | my $conf = $self->{vmconf}; | |
298 | ||
299 | $self->log('info', "starting VM $vmid on remote node '$self->{node}'"); | |
300 | ||
301 | my $rport; | |
302 | ||
303 | my $nodename = PVE::INotify::nodename(); | |
304 | ||
305 | ## start on remote node | |
306 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'start', | |
307 | $vmid, '--stateuri', 'tcp', '--skiplock', '--migratedfrom', $nodename]; | |
308 | ||
309 | PVE::Tools::run_command($cmd, outfunc => sub { | |
310 | my $line = shift; | |
311 | ||
312 | if ($line =~ m/^migration listens on port (\d+)$/) { | |
313 | $rport = $1; | |
314 | } | |
315 | }, errfunc => sub {}); | |
316 | ||
317 | die "unable to detect remote migration port\n" if !$rport; | |
318 | ||
319 | $self->log('info', "starting migration tunnel"); | |
320 | ||
321 | ## create tunnel to remote port | |
322 | my $lport = PVE::QemuServer::next_migrate_port(); | |
323 | $self->{tunnel} = $self->fork_tunnel($self->{nodeip}, $lport, $rport); | |
324 | ||
325 | $self->log('info', "starting online/live migration on port $lport"); | |
326 | # start migration | |
327 | ||
328 | my $start = time(); | |
329 | ||
330 | # load_defaults | |
331 | my $defaults = PVE::QemuServer::load_defaults(); | |
332 | ||
333 | # always set migrate speed (overwrite kvm default of 32m) | |
334 | # we set a very hight default of 8192m which is basically unlimited | |
335 | my $migrate_speed = $defaults->{migrate_speed} || 8192; | |
336 | $migrate_speed = $conf->{migrate_speed} || $migrate_speed; | |
337 | $migrate_speed = $migrate_speed * 1048576; | |
338 | $self->log('info', "migrate_set_speed: $migrate_speed"); | |
339 | eval { | |
340 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_set_speed", value => int($migrate_speed)); | |
341 | }; | |
342 | $self->log('info', "migrate_set_speed error: $@") if $@; | |
343 | ||
344 | my $migrate_downtime = $defaults->{migrate_downtime}; | |
345 | $migrate_downtime = $conf->{migrate_downtime} if defined($conf->{migrate_downtime}); | |
346 | if (defined($migrate_downtime)) { | |
347 | $self->log('info', "migrate_set_downtime: $migrate_downtime"); | |
348 | eval { | |
349 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate_set_downtime", value => int($migrate_downtime)); | |
350 | }; | |
351 | $self->log('info', "migrate_set_downtime error: $@") if $@; | |
352 | } | |
353 | ||
354 | my $capabilities = {}; | |
355 | $capabilities->{capability} = "xbzrle"; | |
356 | $capabilities->{state} = JSON::false; | |
357 | ||
358 | eval { | |
359 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate-set-capabilities", capabilities => [$capabilities]); | |
360 | }; | |
361 | ||
362 | #set cachesize 10% of the total memory | |
363 | my $cachesize = int($conf->{memory}*1048576/10); | |
364 | eval { | |
365 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate-set-cache-size", value => $cachesize); | |
366 | }; | |
367 | ||
368 | eval { | |
369 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate", uri => "tcp:localhost:$lport"); | |
370 | }; | |
371 | my $merr = $@; | |
372 | ||
373 | my $lstat = 0; | |
374 | my $usleep = 2000000; | |
375 | my $i = 0; | |
376 | my $err_count = 0; | |
377 | while (1) { | |
378 | $i++; | |
379 | my $avglstat = $lstat/$i if $lstat; | |
380 | ||
381 | usleep($usleep); | |
382 | my $stat; | |
383 | eval { | |
384 | $stat = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "query-migrate"); | |
385 | }; | |
386 | if (my $err = $@) { | |
387 | $err_count++; | |
388 | warn "query migrate failed: $err\n"; | |
389 | if ($err_count <= 5) { | |
390 | usleep(1000000); | |
391 | next; | |
392 | } | |
393 | die "too many query migrate failures - aborting\n"; | |
394 | } | |
395 | if ($stat->{status} =~ m/^(active|completed|failed|cancelled)$/im) { | |
396 | $merr = undef; | |
397 | $err_count = 0; | |
398 | if ($stat->{status} eq 'completed') { | |
399 | my $delay = time() - $start; | |
400 | if ($delay > 0) { | |
401 | my $mbps = sprintf "%.2f", $conf->{memory}/$delay; | |
402 | $self->log('info', "migration speed: $mbps MB/s"); | |
403 | } | |
404 | } | |
405 | ||
406 | if ($stat->{status} eq 'failed' || $stat->{status} eq 'cancelled') { | |
407 | die "aborting\n" | |
408 | } | |
409 | ||
410 | if ($stat->{status} ne 'active') { | |
411 | $self->log('info', "migration status: $stat->{status}"); | |
412 | last; | |
413 | } | |
414 | ||
415 | if ($stat->{ram}->{transferred} ne $lstat) { | |
416 | my $trans = $stat->{ram}->{transferred} || 0; | |
417 | my $rem = $stat->{ram}->{remaining} || 0; | |
418 | my $total = $stat->{ram}->{total} || 0; | |
419 | my $xbzrlecachesize = $stat->{"xbzrle-cache"}->{"cache-size"} || 0; | |
420 | my $xbzrlebytes = $stat->{"xbzrle-cache"}->{"bytes"} || 0; | |
421 | my $xbzrlepages = $stat->{"xbzrle-cache"}->{"pages"} || 0; | |
422 | my $xbzrlecachemiss = $stat->{"xbzrle-cache"}->{"cache-miss"} || 0; | |
423 | my $xbzrleoverflow = $stat->{"xbzrle-cache"}->{"overflow"} || 0; | |
424 | #reduce sleep if remainig memory if lower than the everage transfert | |
425 | $usleep = 300000 if $avglstat && $rem < $avglstat; | |
426 | ||
427 | $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " . | |
428 | "remaining ${rem}), total ${total})"); | |
429 | ||
430 | #$self->log('info', "migration xbzrle cachesize: ${xbzrlecachesize} transferred ${xbzrlebytes} pages ${xbzrlepages} cachemiss ${xbzrlecachemiss} overflow ${xbzrleoverflow}"); | |
431 | } | |
432 | ||
433 | $lstat = $stat->{ram}->{transferred}; | |
434 | ||
435 | } else { | |
436 | die $merr if $merr; | |
437 | die "unable to parse migration status '$stat->{status}' - aborting\n"; | |
438 | } | |
439 | } | |
440 | } | |
441 | ||
442 | sub phase2_cleanup { | |
443 | my ($self, $vmid, $err) = @_; | |
444 | ||
445 | return if !$self->{errors}; | |
446 | $self->{phase2errors} = 1; | |
447 | ||
448 | $self->log('info', "aborting phase 2 - cleanup resources"); | |
449 | ||
450 | my $conf = $self->{vmconf}; | |
451 | delete $conf->{lock}; | |
452 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
453 | if (my $err = $@) { | |
454 | $self->log('err', $err); | |
455 | } | |
456 | ||
457 | # cleanup ressources on target host | |
458 | my $nodename = PVE::INotify::nodename(); | |
459 | ||
460 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'stop', $vmid, '--skiplock', '--migratedfrom', $nodename]; | |
461 | eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; | |
462 | if (my $err = $@) { | |
463 | $self->log('err', $err); | |
464 | $self->{errors} = 1; | |
465 | } | |
466 | } | |
467 | ||
468 | sub phase3 { | |
469 | my ($self, $vmid) = @_; | |
470 | ||
471 | my $volids = $self->{volumes}; | |
472 | return if $self->{phase2errors}; | |
473 | ||
474 | # destroy local copies | |
475 | foreach my $volid (@$volids) { | |
476 | eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; | |
477 | if (my $err = $@) { | |
478 | $self->log('err', "removing local copy of '$volid' failed - $err"); | |
479 | $self->{errors} = 1; | |
480 | last if $err =~ /^interrupted by signal$/; | |
481 | } | |
482 | } | |
483 | } | |
484 | ||
485 | sub phase3_cleanup { | |
486 | my ($self, $vmid, $err) = @_; | |
487 | ||
488 | my $conf = $self->{vmconf}; | |
489 | return if $self->{phase2errors}; | |
490 | ||
491 | # move config to remote node | |
492 | my $conffile = PVE::QemuServer::config_file($vmid); | |
493 | my $newconffile = PVE::QemuServer::config_file($vmid, $self->{node}); | |
494 | ||
495 | die "Failed to move config to node '$self->{node}' - rename failed: $!\n" | |
496 | if !rename($conffile, $newconffile); | |
497 | ||
498 | # now that config file is move, we can resume vm on target if livemigrate | |
499 | if ($self->{tunnel}) { | |
500 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'resume', $vmid, '--skiplock']; | |
501 | eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; | |
502 | if (my $err = $@) { | |
503 | $self->log('err', $err); | |
504 | $self->{errors} = 1; | |
505 | } | |
506 | } | |
507 | ||
508 | # always stop local VM | |
509 | eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; | |
510 | if (my $err = $@) { | |
511 | $self->log('err', "stopping vm failed - $err"); | |
512 | $self->{errors} = 1; | |
513 | } | |
514 | ||
515 | if ($self->{tunnel}) { | |
516 | eval { finish_tunnel($self, $self->{tunnel}); }; | |
517 | if (my $err = $@) { | |
518 | $self->log('err', $err); | |
519 | $self->{errors} = 1; | |
520 | } | |
521 | } | |
522 | ||
523 | # always deactivate volumes - avoid lvm LVs to be active on several nodes | |
524 | eval { | |
525 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
526 | PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); | |
527 | }; | |
528 | if (my $err = $@) { | |
529 | $self->log('err', $err); | |
530 | $self->{errors} = 1; | |
531 | } | |
532 | ||
533 | # clear migrate lock | |
534 | my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ]; | |
535 | $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); | |
536 | } | |
537 | ||
538 | sub final_cleanup { | |
539 | my ($self, $vmid) = @_; | |
540 | ||
541 | # nothing to do | |
542 | } | |
543 | ||
544 | 1; |