]>
Commit | Line | Data |
---|---|---|
3ea94c60 | 1 | package PVE::QemuMigrate; |
1ef75254 | 2 | |
1e3baf05 | 3 | use strict; |
3ea94c60 | 4 | use warnings; |
16e903f2 | 5 | use PVE::AbstractMigrate; |
3ea94c60 | 6 | use IO::File; |
1e3baf05 | 7 | use IPC::Open2; |
3ea94c60 DM |
8 | use PVE::INotify; |
9 | use PVE::Cluster; | |
1e3baf05 | 10 | use PVE::Storage; |
3ea94c60 | 11 | use PVE::QemuServer; |
1e3baf05 | 12 | |
16e903f2 | 13 | use base qw(PVE::AbstractMigrate); |
1e3baf05 | 14 | |
1ef75254 | 15 | sub fork_command_pipe { |
46a84fd4 | 16 | my ($self, $cmd) = @_; |
19672434 | 17 | |
1ef75254 DM |
18 | my $reader = IO::File->new(); |
19 | my $writer = IO::File->new(); | |
20 | ||
21 | my $orig_pid = $$; | |
22 | ||
23 | my $cpid; | |
24 | ||
25 | eval { $cpid = open2($reader, $writer, @$cmd); }; | |
26 | ||
27 | my $err = $@; | |
28 | ||
29 | # catch exec errors | |
30 | if ($orig_pid != $$) { | |
46a84fd4 | 31 | $self->log('err', "can't fork command pipe\n"); |
19672434 DM |
32 | POSIX::_exit(1); |
33 | kill('KILL', $$); | |
1ef75254 DM |
34 | } |
35 | ||
36 | die $err if $err; | |
37 | ||
38 | return { writer => $writer, reader => $reader, pid => $cpid }; | |
39 | } | |
40 | ||
19672434 | 41 | sub finish_command_pipe { |
97439670 | 42 | my ($self, $cmdpipe, $timeout) = @_; |
1ef75254 DM |
43 | |
44 | my $writer = $cmdpipe->{writer}; | |
45 | my $reader = $cmdpipe->{reader}; | |
46 | ||
47 | $writer->close(); | |
48 | $reader->close(); | |
49 | ||
50 | my $cpid = $cmdpipe->{pid}; | |
51 | ||
97439670 DM |
52 | if ($timeout) { |
53 | for (my $i = 0; $i < $timeout; $i++) { | |
54 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
55 | sleep(1); | |
56 | } | |
57 | } | |
58 | ||
59 | $self->log('info', "ssh tunnel still running - terminating now with SIGTERM\n"); | |
60 | kill(15, $cpid); | |
1ef75254 | 61 | |
97439670 DM |
62 | # wait again |
63 | for (my $i = 0; $i < 10; $i++) { | |
64 | return if !PVE::ProcFSTools::check_process_running($cpid); | |
65 | sleep(1); | |
66 | } | |
67 | ||
68 | $self->log('info', "ssh tunnel still running - terminating now with SIGKILL\n"); | |
69 | kill 9, $cpid; | |
70 | sleep 1; | |
1ef75254 DM |
71 | } |
72 | ||
1e3baf05 | 73 | sub fork_tunnel { |
16e903f2 | 74 | my ($self, $nodeip, $lport, $rport) = @_; |
1e3baf05 | 75 | |
16e903f2 | 76 | my $cmd = [@{$self->{rem_ssh}}, '-L', "$lport:localhost:$rport", |
1e3baf05 | 77 | 'qm', 'mtunnel' ]; |
19672434 | 78 | |
46a84fd4 | 79 | my $tunnel = $self->fork_command_pipe($cmd); |
1e3baf05 DM |
80 | |
81 | my $reader = $tunnel->{reader}; | |
82 | ||
83 | my $helo; | |
19672434 | 84 | eval { |
17eed025 | 85 | PVE::Tools::run_with_timeout(60, sub { $helo = <$reader>; }); |
1e3baf05 | 86 | die "no reply\n" if !$helo; |
1ef75254 | 87 | die "no quorum on target node\n" if $helo =~ m/^no quorum$/; |
19672434 | 88 | die "got strange reply from mtunnel ('$helo')\n" |
1e3baf05 DM |
89 | if $helo !~ m/^tunnel online$/; |
90 | }; | |
91 | my $err = $@; | |
92 | ||
93 | if ($err) { | |
46a84fd4 | 94 | $self->finish_command_pipe($tunnel); |
1e3baf05 DM |
95 | die "can't open migration tunnel - $err"; |
96 | } | |
97 | return $tunnel; | |
98 | } | |
99 | ||
19672434 | 100 | sub finish_tunnel { |
16e903f2 | 101 | my ($self, $tunnel) = @_; |
1e3baf05 DM |
102 | |
103 | my $writer = $tunnel->{writer}; | |
104 | ||
19672434 | 105 | eval { |
17eed025 | 106 | PVE::Tools::run_with_timeout(30, sub { |
1e3baf05 DM |
107 | print $writer "quit\n"; |
108 | $writer->flush(); | |
19672434 | 109 | }); |
1e3baf05 DM |
110 | }; |
111 | my $err = $@; | |
19672434 | 112 | |
97439670 | 113 | $self->finish_command_pipe($tunnel, 30); |
19672434 | 114 | |
1e3baf05 DM |
115 | die $err if $err; |
116 | } | |
117 | ||
16e903f2 DM |
118 | sub lock_vm { |
119 | my ($self, $vmid, $code, @param) = @_; | |
3ea94c60 | 120 | |
16e903f2 DM |
121 | return PVE::QemuServer::lock_config($vmid, $code, @param); |
122 | } | |
ff1a2432 | 123 | |
16e903f2 DM |
124 | sub prepare { |
125 | my ($self, $vmid) = @_; | |
ff1a2432 | 126 | |
16e903f2 | 127 | my $online = $self->{opts}->{online}; |
3ea94c60 | 128 | |
16e903f2 | 129 | $self->{storecfg} = PVE::Storage::config(); |
3ea94c60 | 130 | |
16e903f2 DM |
131 | # test is VM exist |
132 | my $conf = $self->{vmconf} = PVE::QemuServer::load_config($vmid); | |
3ea94c60 | 133 | |
16e903f2 | 134 | PVE::QemuServer::check_lock($conf); |
3ea94c60 | 135 | |
16e903f2 DM |
136 | my $running = 0; |
137 | if (my $pid = PVE::QemuServer::check_running($vmid)) { | |
138 | die "cant migrate running VM without --online\n" if !$online; | |
139 | $running = $pid; | |
3ea94c60 DM |
140 | } |
141 | ||
16e903f2 DM |
142 | if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { |
143 | if ($self->{running} || !$self->{opts}->{force}) { | |
144 | die "can't migrate VM which uses local devices\n"; | |
145 | } else { | |
146 | $self->log('info', "migrating VM which uses local devices"); | |
147 | } | |
3ea94c60 DM |
148 | } |
149 | ||
ff1a2432 DM |
150 | # activate volumes |
151 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
16e903f2 DM |
152 | PVE::Storage::activate_volumes($self->{storecfg}, $vollist); |
153 | ||
154 | # fixme: check if storage is available on both nodes | |
3ea94c60 DM |
155 | |
156 | # test ssh connection | |
16e903f2 DM |
157 | my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; |
158 | eval { $self->cmd_quiet($cmd); }; | |
3ea94c60 | 159 | die "Can't connect to destination address using public key\n" if $@; |
ff1a2432 | 160 | |
16e903f2 | 161 | return $running; |
3ea94c60 DM |
162 | } |
163 | ||
164 | sub sync_disks { | |
16e903f2 DM |
165 | my ($self, $vmid) = @_; |
166 | ||
167 | $self->log('info', "copying disk images"); | |
3ea94c60 | 168 | |
16e903f2 DM |
169 | my $conf = $self->{vmconf}; |
170 | ||
171 | $self->{volumes} = []; | |
3ea94c60 DM |
172 | |
173 | my $res = []; | |
174 | ||
175 | eval { | |
176 | ||
177 | my $volhash = {}; | |
178 | my $cdromhash = {}; | |
179 | ||
522c8f97 DM |
180 | my @sids = PVE::Storage::storage_ids($self->{storecfg}); |
181 | foreach my $storeid (@sids) { | |
182 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid); | |
183 | next if $scfg->{shared}; | |
80b2cbd1 AD |
184 | # get list from PVE::Storage (for unused volumes) |
185 | my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid); | |
186 | PVE::Storage::foreach_volid($dl, sub { | |
187 | my ($volid, $sid, $volname) = @_; | |
188 | ||
189 | # check if storage is available on both nodes | |
190 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
191 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
192 | ||
193 | $volhash->{$volid} = 1; | |
194 | }); | |
195 | } | |
3ea94c60 DM |
196 | |
197 | # and add used,owned/non-shared disks (just to be sure we have all) | |
198 | ||
199 | my $sharedvm = 1; | |
200 | PVE::QemuServer::foreach_drive($conf, sub { | |
201 | my ($ds, $drive) = @_; | |
202 | ||
203 | my $volid = $drive->{file}; | |
204 | return if !$volid; | |
205 | ||
206 | die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|; | |
207 | ||
208 | if (PVE::QemuServer::drive_is_cdrom($drive)) { | |
209 | die "cant migrate local cdrom drive\n" if $volid eq 'cdrom'; | |
210 | return if $volid eq 'none'; | |
211 | $cdromhash->{$volid} = 1; | |
212 | } | |
213 | ||
214 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
215 | ||
16e903f2 DM |
216 | # check if storage is available on both nodes |
217 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
218 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
219 | |
220 | return if $scfg->{shared}; | |
221 | ||
222 | die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid}; | |
223 | ||
224 | $sharedvm = 0; | |
225 | ||
16e903f2 | 226 | my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); |
3ea94c60 DM |
227 | |
228 | die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n" | |
16e903f2 | 229 | if !$owner || ($owner != $self->{vmid}); |
3ea94c60 DM |
230 | |
231 | $volhash->{$volid} = 1; | |
232 | }); | |
233 | ||
16e903f2 | 234 | if ($self->{running} && !$sharedvm) { |
3ea94c60 DM |
235 | die "can't do online migration - VM uses local disks\n"; |
236 | } | |
237 | ||
238 | # do some checks first | |
239 | foreach my $volid (keys %$volhash) { | |
240 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 | 241 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); |
3ea94c60 DM |
242 | |
243 | die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n" | |
244 | if $scfg->{type} ne 'dir'; | |
245 | } | |
246 | ||
247 | foreach my $volid (keys %$volhash) { | |
248 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 DM |
249 | push @{$self->{volumes}}, $volid; |
250 | PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); | |
3ea94c60 DM |
251 | } |
252 | }; | |
253 | die "Failed to sync data - $@" if $@; | |
254 | } | |
255 | ||
1e3baf05 | 256 | sub phase1 { |
16e903f2 | 257 | my ($self, $vmid) = @_; |
1e3baf05 | 258 | |
16e903f2 | 259 | $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); |
1e3baf05 | 260 | |
16e903f2 | 261 | my $conf = $self->{vmconf}; |
1e3baf05 DM |
262 | |
263 | # set migrate lock in config file | |
1858638f DM |
264 | $conf->{lock} = 'migrate'; |
265 | PVE::QemuServer::update_config_nolock($vmid, $conf, 1); | |
1e3baf05 | 266 | |
16e903f2 | 267 | sync_disks($self, $vmid); |
1ef75254 DM |
268 | |
269 | # move config to remote node | |
16e903f2 DM |
270 | my $conffile = PVE::QemuServer::config_file($vmid); |
271 | my $newconffile = PVE::QemuServer::config_file($vmid, $self->{node}); | |
1e3baf05 | 272 | |
16e903f2 | 273 | die "Failed to move config to node '$self->{node}' - rename failed: $!\n" |
1ef75254 | 274 | if !rename($conffile, $newconffile); |
1e3baf05 DM |
275 | }; |
276 | ||
16e903f2 DM |
277 | sub phase1_cleanup { |
278 | my ($self, $vmid, $err) = @_; | |
279 | ||
280 | $self->log('info', "aborting phase 1 - cleanup resources"); | |
281 | ||
1858638f DM |
282 | my $conf = $self->{vmconf}; |
283 | delete $conf->{lock}; | |
284 | eval { PVE::QemuServer::update_config_nolock($vmid, $conf, 1) }; | |
16e903f2 DM |
285 | if (my $err = $@) { |
286 | $self->log('err', $err); | |
287 | } | |
288 | ||
289 | if ($self->{volumes}) { | |
290 | foreach my $volid (@{$self->{volumes}}) { | |
291 | $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); | |
292 | # fixme: try to remove ? | |
293 | } | |
294 | } | |
295 | } | |
296 | ||
1e3baf05 | 297 | sub phase2 { |
16e903f2 | 298 | my ($self, $vmid) = @_; |
1e3baf05 | 299 | |
16e903f2 DM |
300 | my $conf = $self->{vmconf}; |
301 | ||
46a84fd4 | 302 | $self->log('info', "starting VM $vmid on remote node '$self->{node}'"); |
1e3baf05 DM |
303 | |
304 | my $rport; | |
305 | ||
19672434 | 306 | ## start on remote node |
16e903f2 DM |
307 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'start', |
308 | $vmid, '--stateuri', 'tcp', '--skiplock']; | |
1e3baf05 | 309 | |
72afda82 | 310 | PVE::Tools::run_command($cmd, outfunc => sub { |
1e3baf05 DM |
311 | my $line = shift; |
312 | ||
313 | if ($line =~ m/^migration listens on port (\d+)$/) { | |
314 | $rport = $1; | |
315 | } | |
72afda82 | 316 | }, errfunc => sub {}); |
1e3baf05 DM |
317 | |
318 | die "unable to detect remote migration port\n" if !$rport; | |
319 | ||
16e903f2 | 320 | $self->log('info', "starting migration tunnel"); |
1ef75254 | 321 | |
1e3baf05 | 322 | ## create tunnel to remote port |
1ef75254 | 323 | my $lport = PVE::QemuServer::next_migrate_port(); |
16e903f2 | 324 | $self->{tunnel} = $self->fork_tunnel($self->{nodeip}, $lport, $rport); |
1e3baf05 | 325 | |
d68afb26 | 326 | $self->log('info', "starting online/live migration on port $lport"); |
1e3baf05 DM |
327 | # start migration |
328 | ||
329 | my $start = time(); | |
5a7835f5 AD |
330 | eval { |
331 | PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "migrate", uri => "tcp:localhost:$lport"); | |
332 | }; | |
333 | my $merr = $@; | |
1e3baf05 | 334 | |
a05b47a8 | 335 | my $lstat = 0; |
1e3baf05 DM |
336 | while (1) { |
337 | sleep (2); | |
5a7835f5 | 338 | my $stat = PVE::QemuServer::vm_mon_cmd_nocheck($vmid, "query-migrate"); |
5a7835f5 | 339 | if ($stat->{status} =~ m/^(active|completed|failed|cancelled)$/im) { |
d68afb26 | 340 | $merr = undef; |
1e3baf05 | 341 | |
5a7835f5 | 342 | if ($stat->{status} eq 'completed') { |
1e3baf05 DM |
343 | my $delay = time() - $start; |
344 | if ($delay > 0) { | |
345 | my $mbps = sprintf "%.2f", $conf->{memory}/$delay; | |
16e903f2 | 346 | $self->log('info', "migration speed: $mbps MB/s"); |
1e3baf05 DM |
347 | } |
348 | } | |
16e903f2 | 349 | |
5a7835f5 | 350 | if ($stat->{status} eq 'failed' || $stat->{status} eq 'cancelled') { |
1e3baf05 DM |
351 | die "aborting\n" |
352 | } | |
353 | ||
a05b47a8 DM |
354 | if ($stat->{status} ne 'active') { |
355 | $self->log('info', "migration status: $stat->{status}"); | |
356 | last; | |
357 | } | |
358 | ||
359 | if ($stat->{ram}->{transferred} ne $lstat) { | |
360 | my $trans = $stat->{ram}->{transferred} || 0; | |
361 | my $rem = $stat->{ram}->{remaining} || 0; | |
362 | my $total = $stat->{ram}->{total} || 0; | |
363 | ||
364 | $self->log('info', "migration status: $stat->{status} (transferred ${trans}, " . | |
365 | "remaining ${rem}), total ${total})"); | |
366 | } | |
367 | ||
368 | $lstat = $stat->{ram}->{transferred}; | |
369 | ||
1e3baf05 | 370 | } else { |
d68afb26 | 371 | die $merr if $merr; |
5a7835f5 | 372 | die "unable to parse migration status '$stat->{status}' - aborting\n"; |
1e3baf05 | 373 | } |
a05b47a8 | 374 | } |
1e3baf05 | 375 | } |
16e903f2 DM |
376 | |
377 | sub phase3 { | |
378 | my ($self, $vmid) = @_; | |
379 | ||
380 | my $volids = $self->{volumes}; | |
381 | ||
382 | # destroy local copies | |
383 | foreach my $volid (@$volids) { | |
384 | eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; | |
385 | if (my $err = $@) { | |
386 | $self->log('err', "removing local copy of '$volid' failed - $err"); | |
387 | $self->{errors} = 1; | |
388 | last if $err =~ /^interrupted by signal$/; | |
389 | } | |
390 | } | |
16e903f2 DM |
391 | } |
392 | ||
393 | sub phase3_cleanup { | |
394 | my ($self, $vmid, $err) = @_; | |
395 | ||
396 | my $conf = $self->{vmconf}; | |
397 | ||
398 | # always stop local VM | |
399 | eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; | |
400 | if (my $err = $@) { | |
401 | $self->log('err', "stopping vm failed - $err"); | |
402 | $self->{errors} = 1; | |
403 | } | |
404 | ||
97439670 DM |
405 | if ($self->{tunnel}) { |
406 | eval { finish_tunnel($self, $self->{tunnel}); }; | |
407 | if (my $err = $@) { | |
408 | $self->log('err', $err); | |
409 | $self->{errors} = 1; | |
410 | } | |
411 | } | |
412 | ||
16e903f2 DM |
413 | # always deactivate volumes - avoid lvm LVs to be active on several nodes |
414 | eval { | |
415 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
416 | PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); | |
417 | }; | |
418 | if (my $err = $@) { | |
419 | $self->log('err', $err); | |
420 | $self->{errors} = 1; | |
421 | } | |
422 | ||
423 | # clear migrate lock | |
424 | my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ]; | |
425 | $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); | |
426 | } | |
427 | ||
428 | sub final_cleanup { | |
429 | my ($self, $vmid) = @_; | |
430 | ||
431 | # nothing to do | |
432 | } | |
433 | ||
434 | 1; |