]>
Commit | Line | Data |
---|---|---|
3ea94c60 | 1 | package PVE::QemuMigrate; |
1ef75254 | 2 | |
1e3baf05 | 3 | use strict; |
3ea94c60 | 4 | use warnings; |
16e903f2 | 5 | use PVE::AbstractMigrate; |
3ea94c60 | 6 | use IO::File; |
1e3baf05 | 7 | use IPC::Open2; |
3ea94c60 DM |
8 | use PVE::INotify; |
9 | use PVE::Cluster; | |
1e3baf05 | 10 | use PVE::Storage; |
3ea94c60 | 11 | use PVE::QemuServer; |
1e3baf05 | 12 | |
16e903f2 | 13 | use base qw(PVE::AbstractMigrate); |
1e3baf05 | 14 | |
1ef75254 | 15 | sub fork_command_pipe { |
46a84fd4 | 16 | my ($self, $cmd) = @_; |
19672434 | 17 | |
1ef75254 DM |
18 | my $reader = IO::File->new(); |
19 | my $writer = IO::File->new(); | |
20 | ||
21 | my $orig_pid = $$; | |
22 | ||
23 | my $cpid; | |
24 | ||
25 | eval { $cpid = open2($reader, $writer, @$cmd); }; | |
26 | ||
27 | my $err = $@; | |
28 | ||
29 | # catch exec errors | |
30 | if ($orig_pid != $$) { | |
46a84fd4 | 31 | $self->log('err', "can't fork command pipe\n"); |
19672434 DM |
32 | POSIX::_exit(1); |
33 | kill('KILL', $$); | |
1ef75254 DM |
34 | } |
35 | ||
36 | die $err if $err; | |
37 | ||
38 | return { writer => $writer, reader => $reader, pid => $cpid }; | |
39 | } | |
40 | ||
19672434 | 41 | sub finish_command_pipe { |
97439670 | 42 | my ($self, $cmdpipe, $timeout) = @_; |
1ef75254 DM |
43 | |
44 | my $writer = $cmdpipe->{writer}; | |
45 | my $reader = $cmdpipe->{reader}; | |
46 | ||
47 | $writer->close(); | |
48 | $reader->close(); | |
49 | ||
50 | my $cpid = $cmdpipe->{pid}; | |
51 | ||
97439670 DM |
52 | if ($timeout) { |
53 | for (my $i = 0; $i < $timeout; $i++) { | |
54 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
55 | sleep(1); | |
56 | } | |
57 | } | |
58 | ||
59 | $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n"); | |
60 | kill(15, $cpid); | |
1ef75254 | 61 | |
97439670 DM |
62 | # wait again |
63 | for (my $i = 0; $i < 10; $i++) { | |
64 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
65 | sleep(1); | |
66 | } | |
67 | ||
68 | $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n"); | |
69 | kill 9, $cpid; | |
70 | sleep 1; | |
1ef75254 DM |
71 | } |
72 | ||
1e3baf05 | 73 | sub fork_tunnel { |
16e903f2 | 74 | my ($self, $nodeip, $lport, $rport) = @_; |
1e3baf05 | 75 | |
16e903f2 | 76 | my $cmd = [@{$self->{rem_ssh}}, '-L', "$lport:localhost:$rport", |
1e3baf05 | 77 | 'qm', 'mtunnel' ]; |
19672434 | 78 | |
46a84fd4 | 79 | my $tunnel = $self->fork_command_pipe($cmd); |
1e3baf05 DM |
80 | |
81 | my $reader = $tunnel->{reader}; | |
82 | ||
83 | my $helo; | |
19672434 | 84 | eval { |
17eed025 | 85 | PVE::Tools::run_with_timeout(60, sub { $helo = <$reader>; }); |
1e3baf05 | 86 | die "no reply\n" if !$helo; |
1ef75254 | 87 | die "no quorum on target node\n" if $helo =~ m/^no quorum$/; |
19672434 | 88 | die "got strange reply from mtunnel ('$helo')\n" |
1e3baf05 DM |
89 | if $helo !~ m/^tunnel online$/; |
90 | }; | |
91 | my $err = $@; | |
92 | ||
93 | if ($err) { | |
46a84fd4 | 94 | $self->finish_command_pipe($tunnel); |
1e3baf05 DM |
95 | die "can't open migration tunnel - $err"; |
96 | } | |
97 | return $tunnel; | |
98 | } | |
99 | ||
19672434 | 100 | sub finish_tunnel { |
16e903f2 | 101 | my ($self, $tunnel) = @_; |
1e3baf05 DM |
102 | |
103 | my $writer = $tunnel->{writer}; | |
104 | ||
19672434 | 105 | eval { |
17eed025 | 106 | PVE::Tools::run_with_timeout(30, sub { |
1e3baf05 DM |
107 | print $writer "quit\n"; |
108 | $writer->flush(); | |
19672434 | 109 | }); |
1e3baf05 DM |
110 | }; |
111 | my $err = $@; | |
19672434 | 112 | |
97439670 | 113 | $self->finish_command_pipe($tunnel, 30); |
19672434 | 114 | |
1e3baf05 DM |
115 | die $err if $err; |
116 | } | |
117 | ||
16e903f2 DM |
118 | sub lock_vm { |
119 | my ($self, $vmid, $code, @param) = @_; | |
3ea94c60 | 120 | |
16e903f2 DM |
121 | return PVE::QemuServer::lock_config($vmid, $code, @param); |
122 | } | |
ff1a2432 | 123 | |
16e903f2 DM |
124 | sub prepare { |
125 | my ($self, $vmid) = @_; | |
ff1a2432 | 126 | |
16e903f2 | 127 | my $online = $self->{opts}->{online}; |
3ea94c60 | 128 | |
16e903f2 | 129 | $self->{storecfg} = PVE::Storage::config(); |
3ea94c60 | 130 | |
16e903f2 DM |
131 | # test is VM exist |
132 | my $conf = $self->{vmconf} = PVE::QemuServer::load_config($vmid); | |
3ea94c60 | 133 | |
16e903f2 | 134 | PVE::QemuServer::check_lock($conf); |
3ea94c60 | 135 | |
16e903f2 DM |
136 | my $running = 0; |
137 | if (my $pid = PVE::QemuServer::check_running($vmid)) { | |
138 | die "cant migrate running VM without --online\n" if !$online; | |
139 | $running = $pid; | |
3ea94c60 DM |
140 | } |
141 | ||
16e903f2 DM |
142 | if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { |
143 | if ($self->{running} || !$self->{opts}->{force}) { | |
144 | die "can't migrate VM which uses local devices\n"; | |
145 | } else { | |
146 | $self->log('info', "migrating VM which uses local devices"); | |
147 | } | |
3ea94c60 DM |
148 | } |
149 | ||
ff1a2432 DM |
150 | # activate volumes |
151 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
16e903f2 DM |
152 | PVE::Storage::activate_volumes($self->{storecfg}, $vollist); |
153 | ||
154 | # fixme: check if storage is available on both nodes | |
3ea94c60 DM |
155 | |
156 | # test ssh connection | |
16e903f2 DM |
157 | my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; |
158 | eval { $self->cmd_quiet($cmd); }; | |
3ea94c60 | 159 | die "Can't connect to destination address using public key\n" if $@; |
ff1a2432 | 160 | |
16e903f2 | 161 | return $running; |
3ea94c60 DM |
162 | } |
163 | ||
164 | sub sync_disks { | |
16e903f2 DM |
165 | my ($self, $vmid) = @_; |
166 | ||
167 | $self->log('info', "copying disk images"); | |
3ea94c60 | 168 | |
16e903f2 DM |
169 | my $conf = $self->{vmconf}; |
170 | ||
171 | $self->{volumes} = []; | |
3ea94c60 DM |
172 | |
173 | my $res = []; | |
174 | ||
175 | eval { | |
176 | ||
177 | my $volhash = {}; | |
178 | my $cdromhash = {}; | |
179 | ||
522c8f97 DM |
180 | my @sids = PVE::Storage::storage_ids($self->{storecfg}); |
181 | foreach my $storeid (@sids) { | |
182 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid); | |
183 | next if $scfg->{shared}; | |
373ea579 DM |
184 | next if !PVE::Storage::storage_check_enabled($self->{storecfg}, $storeid, undef, 1); |
185 | ||
80b2cbd1 AD |
186 | # get list from PVE::Storage (for unused volumes) |
187 | my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid); | |
188 | PVE::Storage::foreach_volid($dl, sub { | |
189 | my ($volid, $sid, $volname) = @_; | |
190 | ||
373ea579 | 191 | # check if storage is available on target node |
80b2cbd1 AD |
192 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); |
193 | ||
194 | $volhash->{$volid} = 1; | |
195 | }); | |
196 | } | |
3ea94c60 DM |
197 | |
198 | # and add used,owned/non-shared disks (just to be sure we have all) | |
199 | ||
200 | my $sharedvm = 1; | |
201 | PVE::QemuServer::foreach_drive($conf, sub { | |
202 | my ($ds, $drive) = @_; | |
203 | ||
204 | my $volid = $drive->{file}; | |
205 | return if !$volid; | |
206 | ||
207 | die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|; | |
208 | ||
209 | if (PVE::QemuServer::drive_is_cdrom($drive)) { | |
210 | die "cant migrate local cdrom drive\n" if $volid eq 'cdrom'; | |
211 | return if $volid eq 'none'; | |
212 | $cdromhash->{$volid} = 1; | |
213 | } | |
214 | ||
215 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
216 | ||
16e903f2 DM |
217 | # check if storage is available on both nodes |
218 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
219 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
220 | |
221 | return if $scfg->{shared}; | |
222 | ||
223 | die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid}; | |
224 | ||
225 | $sharedvm = 0; | |
226 | ||
16e903f2 | 227 | my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); |
3ea94c60 DM |
228 | |
229 | die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n" | |
16e903f2 | 230 | if !$owner || ($owner != $self->{vmid}); |
3ea94c60 DM |
231 | |
232 | $volhash->{$volid} = 1; | |
233 | }); | |
234 | ||
16e903f2 | 235 | if ($self->{running} && !$sharedvm) { |
3ea94c60 DM |
236 | die "can't do online migration - VM uses local disks\n"; |
237 | } | |
238 | ||
239 | # do some checks first | |
240 | foreach my $volid (keys %$volhash) { | |
241 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 | 242 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); |
3ea94c60 DM |
243 | |
244 | die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n" | |
245 | if $scfg->{type} ne 'dir'; | |
246 | } | |
247 | ||
248 | foreach my $volid (keys %$volhash) { | |
249 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 DM |
250 | push @{$self->{volumes}}, $volid; |
251 | PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); | |
3ea94c60 DM |
252 | } |
253 | }; | |
254 | die "Failed to sync data - $@" if $@; | |
255 | } | |
256 | ||
1e3baf05 | 257 | sub phase1 { |
16e903f2 | 258 | my ($self, $vmid) = @_; |
1e3baf05 | 259 | |
16e903f2 | 260 | $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); |
1e3baf05 | 261 | |
16e903f2 | 262 | my $conf = $self->{vmconf}; |
1e3baf05 DM |
263 | |
264 | # set migrate lock in config file | |
1858638f DM |
265 | $conf->{lock} = 'migrate'; |
266 | PVE::QemuServer::update_config_nolock($vmid, $conf, 1); | |
1e3baf05 | 267 | |
16e903f2 | 268 | sync_disks($self, $vmid); |
1ef75254 | 269 | |
1e3baf05 DM |
270 | }; |
271 | ||
16e903f2 DM |
272 | sub phase1_cleanup { |
273 | my ($self, $vmid, $err) = @_; | |
274 | ||
275 | $self->log('info', "aborting phase 1 - cleanup resources"); | |
276 | ||
1858638f DM |
277 | my $conf = $self->{vmconf}; |
278 | delete $conf->{lock}; | |
279 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
16e903f2 DM |
280 | if (my $err = $@) { |
281 | $self->log('err', $err); | |
282 | } | |
283 | ||
284 | if ($self->{volumes}) { | |
285 | foreach my $volid (@{$self->{volumes}}) { | |
286 | $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); | |
287 | # fixme: try to remove ? | |
288 | } | |
289 | } | |
290 | } | |
291 | ||
1e3baf05 | 292 | sub phase2 { |
16e903f2 | 293 | my ($self, $vmid) = @_; |
1e3baf05 | 294 | |
16e903f2 DM |
295 | my $conf = $self->{vmconf}; |
296 | ||
46a84fd4 | 297 | $self->log('info', "starting VM $vmid on remote node '$self->{node}'"); |
1e3baf05 DM |
298 | |
299 | my $rport; | |
300 | ||
7e8dcf2c AD |
301 | my $nodename = PVE::INotify::nodename(); |
302 | ||
19672434 | 303 | ## start on remote node |
7e8dcf2c AD |
304 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'start', |
305 | $vmid, '--stateuri', 'tcp', '--skiplock', '--migratedfrom', $nodename]; | |
1e3baf05 | 306 | |
72afda82 | 307 | PVE::Tools::run_command($cmd, outfunc => sub { |
1e3baf05 DM |
308 | my $line = shift; |
309 | ||
310 | if ($line =~ m/^migration listens on port (\d+)$/) { | |
311 | $rport = $1; | |
312 | } | |
72afda82 | 313 | }, errfunc => sub {}); |
1e3baf05 DM |
314 | |
315 | die "unable to detect remote migration port\n" if !$rport; | |
316 | ||
16e903f2 | 317 | $self->log('info', "starting migration tunnel"); |
1ef75254 | 318 | |
1e3baf05 | 319 | ## create tunnel to remote port |
1ef75254 | 320 | my $lport = PVE::QemuServer::next_migrate_port(); |
16e903f2 | 321 | $self->{tunnel} = $self->fork_tunnel($self->{nodeip}, $lport, $rport); |
1e3baf05 | 322 | |
d68afb26 | 323 | $self->log('info', "starting online/live migration on port $lport"); |
1e3baf05 DM |
324 | # start migration |
325 | ||
326 | my $start = time(); | |
5a7835f5 AD |
327 | eval { |
328 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate", uri => "tcp:localhost:$lport"); | |
329 | }; | |
330 | my $merr = $@; | |
1e3baf05 | 331 | |
a05b47a8 | 332 | my $lstat = 0; |
1e3baf05 DM |
333 | while (1) { |
334 | sleep (2); | |
5a7835f5 | 335 | my $stat = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "query-migrate"); |
5a7835f5 | 336 | if ($stat->{status} =~ m/^(active|completed|failed|cancelled)$/im) { |
d68afb26 | 337 | $merr = undef; |
1e3baf05 | 338 | |
5a7835f5 | 339 | if ($stat->{status} eq 'completed') { |
1e3baf05 DM |
340 | my $delay = time() - $start; |
341 | if ($delay > 0) { | |
342 | my $mbps = sprintf "%.2f", $conf->{memory}/$delay; | |
16e903f2 | 343 | $self->log('info', "migration speed: $mbps MB/s"); |
1e3baf05 DM |
344 | } |
345 | } | |
16e903f2 | 346 | |
5a7835f5 | 347 | if ($stat->{status} eq 'failed' || $stat->{status} eq 'cancelled') { |
1e3baf05 DM |
348 | die "aborting\n" |
349 | } | |
350 | ||
a05b47a8 DM |
351 | if ($stat->{status} ne 'active') { |
352 | $self->log('info', "migration status: $stat->{status}"); | |
353 | last; | |
354 | } | |
355 | ||
356 | if ($stat->{ram}->{transferred} ne $lstat) { | |
357 | my $trans = $stat->{ram}->{transferred} || 0; | |
358 | my $rem = $stat->{ram}->{remaining} || 0; | |
359 | my $total = $stat->{ram}->{total} || 0; | |
360 | ||
361 | $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " . | |
362 | "remaining ${rem}), total ${total})"); | |
363 | } | |
364 | ||
365 | $lstat = $stat->{ram}->{transferred}; | |
366 | ||
1e3baf05 | 367 | } else { |
d68afb26 | 368 | die $merr if $merr; |
5a7835f5 | 369 | die "unable to parse migration status '$stat->{status}' - aborting\n"; |
1e3baf05 | 370 | } |
a05b47a8 | 371 | } |
1e3baf05 | 372 | } |
16e903f2 | 373 | |
c04b5b04 AD |
374 | sub phase2_cleanup { |
375 | my ($self, $vmid, $err) = @_; | |
376 | ||
377 | $self->log('info', "aborting phase 2 - cleanup resources"); | |
378 | ||
379 | my $conf = $self->{vmconf}; | |
380 | delete $conf->{lock}; | |
381 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
382 | if (my $err = $@) { | |
383 | $self->log('err', $err); | |
384 | } | |
385 | ||
386 | ## fixme : vm_stop_cleanup on target vm | |
387 | ||
388 | ||
389 | } | |
390 | ||
16e903f2 DM |
391 | sub phase3 { |
392 | my ($self, $vmid) = @_; | |
393 | ||
394 | my $volids = $self->{volumes}; | |
395 | ||
396 | # destroy local copies | |
397 | foreach my $volid (@$volids) { | |
398 | eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; | |
399 | if (my $err = $@) { | |
400 | $self->log('err', "removing local copy of '$volid' failed - $err"); | |
401 | $self->{errors} = 1; | |
402 | last if $err =~ /^interrupted by signal$/; | |
403 | } | |
404 | } | |
16e903f2 DM |
405 | } |
406 | ||
407 | sub phase3_cleanup { | |
408 | my ($self, $vmid, $err) = @_; | |
409 | ||
410 | my $conf = $self->{vmconf}; | |
411 | ||
b8d20802 AD |
412 | # move config to remote node |
413 | my $conffile = PVE::QemuServer::config_file($vmid); | |
414 | my $newconffile = PVE::QemuServer::config_file($vmid, $self->{node}); | |
415 | ||
416 | die "Failed to move config to node '$self->{node}' - rename failed: $!\n" | |
417 | if !rename($conffile, $newconffile); | |
418 | ||
b67900f1 AD |
419 | ## now that config file is move, we can resume vm on target if livemigrate |
420 | if ($self->{tunnel}) { | |
421 | ||
422 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'resume', $vmid, '--skiplock']; | |
423 | eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) }; | |
424 | if (my $err = $@) { | |
425 | $self->log('err', $err); | |
426 | $self->{errors} = 1; | |
427 | } | |
428 | } | |
429 | ||
430 | ||
16e903f2 DM |
431 | # always stop local VM |
432 | eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; | |
433 | if (my $err = $@) { | |
434 | $self->log('err', "stopping vm failed - $err"); | |
435 | $self->{errors} = 1; | |
436 | } | |
437 | ||
97439670 DM |
438 | if ($self->{tunnel}) { |
439 | eval { finish_tunnel($self, $self->{tunnel}); }; | |
440 | if (my $err = $@) { | |
441 | $self->log('err', $err); | |
442 | $self->{errors} = 1; | |
443 | } | |
444 | } | |
445 | ||
16e903f2 DM |
446 | # always deactivate volumes - avoid lvm LVs to be active on several nodes |
447 | eval { | |
448 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
449 | PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); | |
450 | }; | |
451 | if (my $err = $@) { | |
452 | $self->log('err', $err); | |
453 | $self->{errors} = 1; | |
454 | } | |
455 | ||
456 | # clear migrate lock | |
457 | my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ]; | |
458 | $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); | |
459 | } | |
460 | ||
461 | sub final_cleanup { | |
462 | my ($self, $vmid) = @_; | |
463 | ||
464 | # nothing to do | |
465 | } | |
466 | ||
467 | 1; |