]>
Commit | Line | Data |
---|---|---|
3ea94c60 | 1 | package PVE::QemuMigrate; |
1ef75254 | 2 | |
1e3baf05 | 3 | use strict; |
3ea94c60 | 4 | use warnings; |
16e903f2 | 5 | use PVE::AbstractMigrate; |
3ea94c60 | 6 | use IO::File; |
1e3baf05 | 7 | use IPC::Open2; |
3ea94c60 DM |
8 | use PVE::INotify; |
9 | use PVE::Cluster; | |
1e3baf05 | 10 | use PVE::Storage; |
3ea94c60 | 11 | use PVE::QemuServer; |
1e3baf05 | 12 | |
16e903f2 | 13 | use base qw(PVE::AbstractMigrate); |
1e3baf05 | 14 | |
1ef75254 | 15 | sub fork_command_pipe { |
46a84fd4 | 16 | my ($self, $cmd) = @_; |
19672434 | 17 | |
1ef75254 DM |
18 | my $reader = IO::File->new(); |
19 | my $writer = IO::File->new(); | |
20 | ||
21 | my $orig_pid = $$; | |
22 | ||
23 | my $cpid; | |
24 | ||
25 | eval { $cpid = open2($reader, $writer, @$cmd); }; | |
26 | ||
27 | my $err = $@; | |
28 | ||
29 | # catch exec errors | |
30 | if ($orig_pid != $$) { | |
46a84fd4 | 31 | $self->log('err', "can't fork command pipe\n"); |
19672434 DM |
32 | POSIX::_exit(1); |
33 | kill('KILL', $$); | |
1ef75254 DM |
34 | } |
35 | ||
36 | die $err if $err; | |
37 | ||
38 | return { writer => $writer, reader => $reader, pid => $cpid }; | |
39 | } | |
40 | ||
19672434 | 41 | sub finish_command_pipe { |
97439670 | 42 | my ($self, $cmdpipe, $timeout) = @_; |
1ef75254 DM |
43 | |
44 | my $writer = $cmdpipe->{writer}; | |
45 | my $reader = $cmdpipe->{reader}; | |
46 | ||
47 | $writer->close(); | |
48 | $reader->close(); | |
49 | ||
50 | my $cpid = $cmdpipe->{pid}; | |
51 | ||
97439670 DM |
52 | if ($timeout) { |
53 | for (my $i = 0; $i < $timeout; $i++) { | |
54 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
55 | sleep(1); | |
56 | } | |
57 | } | |
58 | ||
59 | $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n"); | |
60 | kill(15, $cpid); | |
1ef75254 | 61 | |
97439670 DM |
62 | # wait again |
63 | for (my $i = 0; $i < 10; $i++) { | |
64 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
65 | sleep(1); | |
66 | } | |
67 | ||
68 | $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n"); | |
69 | kill 9, $cpid; | |
70 | sleep 1; | |
1ef75254 DM |
71 | } |
72 | ||
1e3baf05 | 73 | sub fork_tunnel { |
16e903f2 | 74 | my ($self, $nodeip, $lport, $rport) = @_; |
1e3baf05 | 75 | |
16e903f2 | 76 | my $cmd = [@{$self->{rem_ssh}}, '-L', "$lport:localhost:$rport", |
1e3baf05 | 77 | 'qm', 'mtunnel' ]; |
19672434 | 78 | |
46a84fd4 | 79 | my $tunnel = $self->fork_command_pipe($cmd); |
1e3baf05 DM |
80 | |
81 | my $reader = $tunnel->{reader}; | |
82 | ||
83 | my $helo; | |
19672434 | 84 | eval { |
17eed025 | 85 | PVE::Tools::run_with_timeout(60, sub { $helo = <$reader>; }); |
1e3baf05 | 86 | die "no reply\n" if !$helo; |
1ef75254 | 87 | die "no quorum on target node\n" if $helo =~ m/^no quorum$/; |
19672434 | 88 | die "got strange reply from mtunnel ('$helo')\n" |
1e3baf05 DM |
89 | if $helo !~ m/^tunnel online$/; |
90 | }; | |
91 | my $err = $@; | |
92 | ||
93 | if ($err) { | |
46a84fd4 | 94 | $self->finish_command_pipe($tunnel); |
1e3baf05 DM |
95 | die "can't open migration tunnel - $err"; |
96 | } | |
97 | return $tunnel; | |
98 | } | |
99 | ||
19672434 | 100 | sub finish_tunnel { |
16e903f2 | 101 | my ($self, $tunnel) = @_; |
1e3baf05 DM |
102 | |
103 | my $writer = $tunnel->{writer}; | |
104 | ||
19672434 | 105 | eval { |
17eed025 | 106 | PVE::Tools::run_with_timeout(30, sub { |
1e3baf05 DM |
107 | print $writer "quit\n"; |
108 | $writer->flush(); | |
19672434 | 109 | }); |
1e3baf05 DM |
110 | }; |
111 | my $err = $@; | |
19672434 | 112 | |
97439670 | 113 | $self->finish_command_pipe($tunnel, 30); |
19672434 | 114 | |
1e3baf05 DM |
115 | die $err if $err; |
116 | } | |
117 | ||
16e903f2 DM |
118 | sub lock_vm { |
119 | my ($self, $vmid, $code, @param) = @_; | |
3ea94c60 | 120 | |
16e903f2 DM |
121 | return PVE::QemuServer::lock_config($vmid, $code, @param); |
122 | } | |
ff1a2432 | 123 | |
16e903f2 DM |
124 | sub prepare { |
125 | my ($self, $vmid) = @_; | |
ff1a2432 | 126 | |
16e903f2 | 127 | my $online = $self->{opts}->{online}; |
3ea94c60 | 128 | |
16e903f2 | 129 | $self->{storecfg} = PVE::Storage::config(); |
3ea94c60 | 130 | |
16e903f2 DM |
131 | # test is VM exist |
132 | my $conf = $self->{vmconf} = PVE::QemuServer::load_config($vmid); | |
3ea94c60 | 133 | |
16e903f2 | 134 | PVE::QemuServer::check_lock($conf); |
3ea94c60 | 135 | |
16e903f2 DM |
136 | my $running = 0; |
137 | if (my $pid = PVE::QemuServer::check_running($vmid)) { | |
138 | die "cant migrate running VM without --online\n" if !$online; | |
139 | $running = $pid; | |
3ea94c60 DM |
140 | } |
141 | ||
16e903f2 DM |
142 | if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { |
143 | if ($self->{running} || !$self->{opts}->{force}) { | |
144 | die "can't migrate VM which uses local devices\n"; | |
145 | } else { | |
146 | $self->log('info', "migrating VM which uses local devices"); | |
147 | } | |
3ea94c60 DM |
148 | } |
149 | ||
ff1a2432 DM |
150 | # activate volumes |
151 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
16e903f2 DM |
152 | PVE::Storage::activate_volumes($self->{storecfg}, $vollist); |
153 | ||
154 | # fixme: check if storage is available on both nodes | |
3ea94c60 DM |
155 | |
156 | # test ssh connection | |
16e903f2 DM |
157 | my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; |
158 | eval { $self->cmd_quiet($cmd); }; | |
3ea94c60 | 159 | die "Can't connect to destination address using public key\n" if $@; |
ff1a2432 | 160 | |
16e903f2 | 161 | return $running; |
3ea94c60 DM |
162 | } |
163 | ||
164 | sub sync_disks { | |
16e903f2 DM |
165 | my ($self, $vmid) = @_; |
166 | ||
167 | $self->log('info', "copying disk images"); | |
3ea94c60 | 168 | |
16e903f2 DM |
169 | my $conf = $self->{vmconf}; |
170 | ||
171 | $self->{volumes} = []; | |
3ea94c60 DM |
172 | |
173 | my $res = []; | |
174 | ||
175 | eval { | |
176 | ||
177 | my $volhash = {}; | |
178 | my $cdromhash = {}; | |
179 | ||
180 | # get list from PVE::Storage (for unused volumes) | |
16e903f2 | 181 | my $dl = PVE::Storage::vdisk_list($self->{storecfg}, undef, $vmid); |
3ea94c60 DM |
182 | PVE::Storage::foreach_volid($dl, sub { |
183 | my ($volid, $sid, $volname) = @_; | |
184 | ||
16e903f2 DM |
185 | # check if storage is available on both nodes |
186 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
187 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
188 | |
189 | return if $scfg->{shared}; | |
190 | ||
191 | $volhash->{$volid} = 1; | |
192 | }); | |
193 | ||
194 | # and add used,owned/non-shared disks (just to be sure we have all) | |
195 | ||
196 | my $sharedvm = 1; | |
197 | PVE::QemuServer::foreach_drive($conf, sub { | |
198 | my ($ds, $drive) = @_; | |
199 | ||
200 | my $volid = $drive->{file}; | |
201 | return if !$volid; | |
202 | ||
203 | die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|; | |
204 | ||
205 | if (PVE::QemuServer::drive_is_cdrom($drive)) { | |
206 | die "cant migrate local cdrom drive\n" if $volid eq 'cdrom'; | |
207 | return if $volid eq 'none'; | |
208 | $cdromhash->{$volid} = 1; | |
209 | } | |
210 | ||
211 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
212 | ||
16e903f2 DM |
213 | # check if storage is available on both nodes |
214 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
215 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
216 | |
217 | return if $scfg->{shared}; | |
218 | ||
219 | die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid}; | |
220 | ||
221 | $sharedvm = 0; | |
222 | ||
16e903f2 | 223 | my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); |
3ea94c60 DM |
224 | |
225 | die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n" | |
16e903f2 | 226 | if !$owner || ($owner != $self->{vmid}); |
3ea94c60 DM |
227 | |
228 | $volhash->{$volid} = 1; | |
229 | }); | |
230 | ||
16e903f2 | 231 | if ($self->{running} && !$sharedvm) { |
3ea94c60 DM |
232 | die "can't do online migration - VM uses local disks\n"; |
233 | } | |
234 | ||
235 | # do some checks first | |
236 | foreach my $volid (keys %$volhash) { | |
237 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 | 238 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); |
3ea94c60 DM |
239 | |
240 | die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n" | |
241 | if $scfg->{type} ne 'dir'; | |
242 | } | |
243 | ||
244 | foreach my $volid (keys %$volhash) { | |
245 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 DM |
246 | push @{$self->{volumes}}, $volid; |
247 | PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); | |
3ea94c60 DM |
248 | } |
249 | }; | |
250 | die "Failed to sync data - $@" if $@; | |
251 | } | |
252 | ||
1e3baf05 | 253 | sub phase1 { |
16e903f2 | 254 | my ($self, $vmid) = @_; |
1e3baf05 | 255 | |
16e903f2 | 256 | $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); |
1e3baf05 | 257 | |
16e903f2 | 258 | my $conf = $self->{vmconf}; |
1e3baf05 DM |
259 | |
260 | # set migrate lock in config file | |
1858638f DM |
261 | $conf->{lock} = 'migrate'; |
262 | PVE::QemuServer::update_config_nolock($vmid, $conf, 1); | |
1e3baf05 | 263 | |
16e903f2 | 264 | sync_disks($self, $vmid); |
1ef75254 DM |
265 | |
266 | # move config to remote node | |
16e903f2 DM |
267 | my $conffile = PVE::QemuServer::config_file($vmid); |
268 | my $newconffile = PVE::QemuServer::config_file($vmid, $self->{node}); | |
1e3baf05 | 269 | |
16e903f2 | 270 | die "Failed to move config to node '$self->{node}' - rename failed: $!\n" |
1ef75254 | 271 | if !rename($conffile, $newconffile); |
1e3baf05 DM |
272 | }; |
273 | ||
16e903f2 DM |
274 | sub phase1_cleanup { |
275 | my ($self, $vmid, $err) = @_; | |
276 | ||
277 | $self->log('info', "aborting phase 1 - cleanup resources"); | |
278 | ||
1858638f DM |
279 | my $conf = $self->{vmconf}; |
280 | delete $conf->{lock}; | |
281 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
16e903f2 DM |
282 | if (my $err = $@) { |
283 | $self->log('err', $err); | |
284 | } | |
285 | ||
286 | if ($self->{volumes}) { | |
287 | foreach my $volid (@{$self->{volumes}}) { | |
288 | $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); | |
289 | # fixme: try to remove ? | |
290 | } | |
291 | } | |
292 | } | |
293 | ||
1e3baf05 | 294 | sub phase2 { |
16e903f2 | 295 | my ($self, $vmid) = @_; |
1e3baf05 | 296 | |
16e903f2 DM |
297 | my $conf = $self->{vmconf}; |
298 | ||
46a84fd4 | 299 | $self->log('info', "starting VM $vmid on remote node '$self->{node}'"); |
1e3baf05 DM |
300 | |
301 | my $rport; | |
302 | ||
19672434 | 303 | ## start on remote node |
16e903f2 DM |
304 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'start', |
305 | $vmid, '--stateuri', 'tcp', '--skiplock']; | |
1e3baf05 | 306 | |
72afda82 | 307 | PVE::Tools::run_command($cmd, outfunc => sub { |
1e3baf05 DM |
308 | my $line = shift; |
309 | ||
310 | if ($line =~ m/^migration listens on port (\d+)$/) { | |
311 | $rport = $1; | |
312 | } | |
72afda82 | 313 | }, errfunc => sub {}); |
1e3baf05 DM |
314 | |
315 | die "unable to detect remote migration port\n" if !$rport; | |
316 | ||
16e903f2 | 317 | $self->log('info', "starting migration tunnel"); |
1ef75254 | 318 | |
1e3baf05 | 319 | ## create tunnel to remote port |
1ef75254 | 320 | my $lport = PVE::QemuServer::next_migrate_port(); |
16e903f2 | 321 | $self->{tunnel} = $self->fork_tunnel($self->{nodeip}, $lport, $rport); |
1e3baf05 | 322 | |
d68afb26 | 323 | $self->log('info', "starting online/live migration on port $lport"); |
1e3baf05 DM |
324 | # start migration |
325 | ||
326 | my $start = time(); | |
5a7835f5 AD |
327 | eval { |
328 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate", uri => "tcp:localhost:$lport"); | |
329 | }; | |
330 | my $merr = $@; | |
1e3baf05 | 331 | |
a05b47a8 | 332 | my $lstat = 0; |
1e3baf05 DM |
333 | while (1) { |
334 | sleep (2); | |
5a7835f5 | 335 | my $stat = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "query-migrate"); |
5a7835f5 | 336 | if ($stat->{status} =~ m/^(active|completed|failed|cancelled)$/im) { |
d68afb26 | 337 | $merr = undef; |
1e3baf05 | 338 | |
5a7835f5 | 339 | if ($stat->{status} eq 'completed') { |
1e3baf05 DM |
340 | my $delay = time() - $start; |
341 | if ($delay > 0) { | |
342 | my $mbps = sprintf "%.2f", $conf->{memory}/$delay; | |
16e903f2 | 343 | $self->log('info', "migration speed: $mbps MB/s"); |
1e3baf05 DM |
344 | } |
345 | } | |
16e903f2 | 346 | |
5a7835f5 | 347 | if ($stat->{status} eq 'failed' || $stat->{status} eq 'cancelled') { |
1e3baf05 DM |
348 | die "aborting\n" |
349 | } | |
350 | ||
a05b47a8 DM |
351 | if ($stat->{status} ne 'active') { |
352 | $self->log('info', "migration status: $stat->{status}"); | |
353 | last; | |
354 | } | |
355 | ||
356 | if ($stat->{ram}->{transferred} ne $lstat) { | |
357 | my $trans = $stat->{ram}->{transferred} || 0; | |
358 | my $rem = $stat->{ram}->{remaining} || 0; | |
359 | my $total = $stat->{ram}->{total} || 0; | |
360 | ||
361 | $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " . | |
362 | "remaining ${rem}), total ${total})"); | |
363 | } | |
364 | ||
365 | $lstat = $stat->{ram}->{transferred}; | |
366 | ||
1e3baf05 | 367 | } else { |
d68afb26 | 368 | die $merr if $merr; |
5a7835f5 | 369 | die "unable to parse migration status '$stat->{status}' - aborting\n"; |
1e3baf05 | 370 | } |
a05b47a8 | 371 | } |
1e3baf05 | 372 | } |
16e903f2 DM |
373 | |
374 | sub phase3 { | |
375 | my ($self, $vmid) = @_; | |
376 | ||
377 | my $volids = $self->{volumes}; | |
378 | ||
379 | # destroy local copies | |
380 | foreach my $volid (@$volids) { | |
381 | eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; | |
382 | if (my $err = $@) { | |
383 | $self->log('err', "removing local copy of '$volid' failed - $err"); | |
384 | $self->{errors} = 1; | |
385 | last if $err =~ /^interrupted by signal$/; | |
386 | } | |
387 | } | |
16e903f2 DM |
388 | } |
389 | ||
390 | sub phase3_cleanup { | |
391 | my ($self, $vmid, $err) = @_; | |
392 | ||
393 | my $conf = $self->{vmconf}; | |
394 | ||
395 | # always stop local VM | |
396 | eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; | |
397 | if (my $err = $@) { | |
398 | $self->log('err', "stopping vm failed - $err"); | |
399 | $self->{errors} = 1; | |
400 | } | |
401 | ||
97439670 DM |
402 | if ($self->{tunnel}) { |
403 | eval { finish_tunnel($self, $self->{tunnel}); }; | |
404 | if (my $err = $@) { | |
405 | $self->log('err', $err); | |
406 | $self->{errors} = 1; | |
407 | } | |
408 | } | |
409 | ||
16e903f2 DM |
410 | # always deactivate volumes - avoid lvm LVs to be active on several nodes |
411 | eval { | |
412 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
413 | PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); | |
414 | }; | |
415 | if (my $err = $@) { | |
416 | $self->log('err', $err); | |
417 | $self->{errors} = 1; | |
418 | } | |
419 | ||
420 | # clear migrate lock | |
421 | my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ]; | |
422 | $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); | |
423 | } | |
424 | ||
425 | sub final_cleanup { | |
426 | my ($self, $vmid) = @_; | |
427 | ||
428 | # nothing to do | |
429 | } | |
430 | ||
431 | 1; |