]>
Commit | Line | Data |
---|---|---|
3ea94c60 | 1 | package PVE::QemuMigrate; |
1ef75254 | 2 | |
1e3baf05 | 3 | use strict; |
3ea94c60 | 4 | use warnings; |
16e903f2 | 5 | use PVE::AbstractMigrate; |
3ea94c60 | 6 | use IO::File; |
1e3baf05 | 7 | use IPC::Open2; |
3ea94c60 DM |
8 | use PVE::INotify; |
9 | use PVE::Cluster; | |
1e3baf05 | 10 | use PVE::Storage; |
3ea94c60 | 11 | use PVE::QemuServer; |
1e3baf05 | 12 | |
16e903f2 | 13 | use base qw(PVE::AbstractMigrate); |
1e3baf05 | 14 | |
1ef75254 | 15 | sub fork_command_pipe { |
46a84fd4 | 16 | my ($self, $cmd) = @_; |
19672434 | 17 | |
1ef75254 DM |
18 | my $reader = IO::File->new(); |
19 | my $writer = IO::File->new(); | |
20 | ||
21 | my $orig_pid = $$; | |
22 | ||
23 | my $cpid; | |
24 | ||
25 | eval { $cpid = open2($reader, $writer, @$cmd); }; | |
26 | ||
27 | my $err = $@; | |
28 | ||
29 | # catch exec errors | |
30 | if ($orig_pid != $$) { | |
46a84fd4 | 31 | $self->log('err', "can't fork command pipe\n"); |
19672434 DM |
32 | POSIX::_exit(1); |
33 | kill('KILL', $$); | |
1ef75254 DM |
34 | } |
35 | ||
36 | die $err if $err; | |
37 | ||
38 | return { writer => $writer, reader => $reader, pid => $cpid }; | |
39 | } | |
40 | ||
19672434 | 41 | sub finish_command_pipe { |
46a84fd4 | 42 | my ($self, $cmdpipe) = @_; |
1ef75254 DM |
43 | |
44 | my $writer = $cmdpipe->{writer}; | |
45 | my $reader = $cmdpipe->{reader}; | |
46 | ||
47 | $writer->close(); | |
48 | $reader->close(); | |
49 | ||
50 | my $cpid = $cmdpipe->{pid}; | |
51 | ||
52 | kill(15, $cpid) if kill(0, $cpid); | |
53 | ||
54 | waitpid($cpid, 0); | |
55 | } | |
56 | ||
1e3baf05 | 57 | sub fork_tunnel { |
16e903f2 | 58 | my ($self, $nodeip, $lport, $rport) = @_; |
1e3baf05 | 59 | |
16e903f2 | 60 | my $cmd = [@{$self->{rem_ssh}}, '-L', "$lport:localhost:$rport", |
1e3baf05 | 61 | 'qm', 'mtunnel' ]; |
19672434 | 62 | |
46a84fd4 | 63 | my $tunnel = $self->fork_command_pipe($cmd); |
1e3baf05 DM |
64 | |
65 | my $reader = $tunnel->{reader}; | |
66 | ||
67 | my $helo; | |
19672434 | 68 | eval { |
17eed025 | 69 | PVE::Tools::run_with_timeout(60, sub { $helo = <$reader>; }); |
1e3baf05 | 70 | die "no reply\n" if !$helo; |
1ef75254 | 71 | die "no quorum on target node\n" if $helo =~ m/^no quorum$/; |
19672434 | 72 | die "got strange reply from mtunnel ('$helo')\n" |
1e3baf05 DM |
73 | if $helo !~ m/^tunnel online$/; |
74 | }; | |
75 | my $err = $@; | |
76 | ||
77 | if ($err) { | |
46a84fd4 | 78 | $self->finish_command_pipe($tunnel); |
1e3baf05 DM |
79 | die "can't open migration tunnel - $err"; |
80 | } | |
81 | return $tunnel; | |
82 | } | |
83 | ||
19672434 | 84 | sub finish_tunnel { |
16e903f2 | 85 | my ($self, $tunnel) = @_; |
1e3baf05 DM |
86 | |
87 | my $writer = $tunnel->{writer}; | |
88 | ||
19672434 | 89 | eval { |
17eed025 | 90 | PVE::Tools::run_with_timeout(30, sub { |
1e3baf05 DM |
91 | print $writer "quit\n"; |
92 | $writer->flush(); | |
19672434 | 93 | }); |
1e3baf05 DM |
94 | }; |
95 | my $err = $@; | |
19672434 | 96 | |
46a84fd4 | 97 | $self->finish_command_pipe($tunnel); |
19672434 | 98 | |
1e3baf05 DM |
99 | die $err if $err; |
100 | } | |
101 | ||
16e903f2 DM |
102 | sub lock_vm { |
103 | my ($self, $vmid, $code, @param) = @_; | |
3ea94c60 | 104 | |
16e903f2 DM |
105 | return PVE::QemuServer::lock_config($vmid, $code, @param); |
106 | } | |
ff1a2432 | 107 | |
16e903f2 DM |
108 | sub prepare { |
109 | my ($self, $vmid) = @_; | |
ff1a2432 | 110 | |
16e903f2 | 111 | my $online = $self->{opts}->{online}; |
3ea94c60 | 112 | |
16e903f2 | 113 | $self->{storecfg} = PVE::Storage::config(); |
3ea94c60 | 114 | |
16e903f2 DM |
115 | # test is VM exist |
116 | my $conf = $self->{vmconf} = PVE::QemuServer::load_config($vmid); | |
3ea94c60 | 117 | |
16e903f2 | 118 | PVE::QemuServer::check_lock($conf); |
3ea94c60 | 119 | |
16e903f2 DM |
120 | my $running = 0; |
121 | if (my $pid = PVE::QemuServer::check_running($vmid)) { | |
122 | die "cant migrate running VM without --online\n" if !$online; | |
123 | $running = $pid; | |
3ea94c60 DM |
124 | } |
125 | ||
16e903f2 DM |
126 | if (my $loc_res = PVE::QemuServer::check_local_resources($conf, 1)) { |
127 | if ($self->{running} || !$self->{opts}->{force}) { | |
128 | die "can't migrate VM which uses local devices\n"; | |
129 | } else { | |
130 | $self->log('info', "migrating VM which uses local devices"); | |
131 | } | |
3ea94c60 DM |
132 | } |
133 | ||
ff1a2432 DM |
134 | # activate volumes |
135 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
16e903f2 DM |
136 | PVE::Storage::activate_volumes($self->{storecfg}, $vollist); |
137 | ||
138 | # fixme: check if storage is available on both nodes | |
3ea94c60 DM |
139 | |
140 | # test ssh connection | |
16e903f2 DM |
141 | my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; |
142 | eval { $self->cmd_quiet($cmd); }; | |
3ea94c60 | 143 | die "Can't connect to destination address using public key\n" if $@; |
ff1a2432 | 144 | |
16e903f2 | 145 | return $running; |
3ea94c60 DM |
146 | } |
147 | ||
148 | sub sync_disks { | |
16e903f2 DM |
149 | my ($self, $vmid) = @_; |
150 | ||
151 | $self->log('info', "copying disk images"); | |
3ea94c60 | 152 | |
16e903f2 DM |
153 | my $conf = $self->{vmconf}; |
154 | ||
155 | $self->{volumes} = []; | |
3ea94c60 DM |
156 | |
157 | my $res = []; | |
158 | ||
159 | eval { | |
160 | ||
161 | my $volhash = {}; | |
162 | my $cdromhash = {}; | |
163 | ||
164 | # get list from PVE::Storage (for unused volumes) | |
16e903f2 | 165 | my $dl = PVE::Storage::vdisk_list($self->{storecfg}, undef, $vmid); |
3ea94c60 DM |
166 | PVE::Storage::foreach_volid($dl, sub { |
167 | my ($volid, $sid, $volname) = @_; | |
168 | ||
16e903f2 DM |
169 | # check if storage is available on both nodes |
170 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
171 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
172 | |
173 | return if $scfg->{shared}; | |
174 | ||
175 | $volhash->{$volid} = 1; | |
176 | }); | |
177 | ||
178 | # and add used,owned/non-shared disks (just to be sure we have all) | |
179 | ||
180 | my $sharedvm = 1; | |
181 | PVE::QemuServer::foreach_drive($conf, sub { | |
182 | my ($ds, $drive) = @_; | |
183 | ||
184 | my $volid = $drive->{file}; | |
185 | return if !$volid; | |
186 | ||
187 | die "cant migrate local file/device '$volid'\n" if $volid =~ m|^/|; | |
188 | ||
189 | if (PVE::QemuServer::drive_is_cdrom($drive)) { | |
190 | die "cant migrate local cdrom drive\n" if $volid eq 'cdrom'; | |
191 | return if $volid eq 'none'; | |
192 | $cdromhash->{$volid} = 1; | |
193 | } | |
194 | ||
195 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
196 | ||
16e903f2 DM |
197 | # check if storage is available on both nodes |
198 | my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); | |
199 | PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); | |
3ea94c60 DM |
200 | |
201 | return if $scfg->{shared}; | |
202 | ||
203 | die "can't migrate local cdrom '$volid'\n" if $cdromhash->{$volid}; | |
204 | ||
205 | $sharedvm = 0; | |
206 | ||
16e903f2 | 207 | my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); |
3ea94c60 DM |
208 | |
209 | die "can't migrate volume '$volid' - owned by other VM (owner = VM $owner)\n" | |
16e903f2 | 210 | if !$owner || ($owner != $self->{vmid}); |
3ea94c60 DM |
211 | |
212 | $volhash->{$volid} = 1; | |
213 | }); | |
214 | ||
16e903f2 | 215 | if ($self->{running} && !$sharedvm) { |
3ea94c60 DM |
216 | die "can't do online migration - VM uses local disks\n"; |
217 | } | |
218 | ||
219 | # do some checks first | |
220 | foreach my $volid (keys %$volhash) { | |
221 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 | 222 | my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); |
3ea94c60 DM |
223 | |
224 | die "can't migrate '$volid' - storagy type '$scfg->{type}' not supported\n" | |
225 | if $scfg->{type} ne 'dir'; | |
226 | } | |
227 | ||
228 | foreach my $volid (keys %$volhash) { | |
229 | my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); | |
16e903f2 DM |
230 | push @{$self->{volumes}}, $volid; |
231 | PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); | |
3ea94c60 DM |
232 | } |
233 | }; | |
234 | die "Failed to sync data - $@" if $@; | |
235 | } | |
236 | ||
1e3baf05 | 237 | sub phase1 { |
16e903f2 | 238 | my ($self, $vmid) = @_; |
1e3baf05 | 239 | |
16e903f2 | 240 | $self->log('info', "starting migration of VM $vmid to node '$self->{node}' ($self->{nodeip})"); |
1e3baf05 | 241 | |
16e903f2 | 242 | my $conf = $self->{vmconf}; |
1e3baf05 DM |
243 | |
244 | # set migrate lock in config file | |
16e903f2 | 245 | PVE::QemuServer::change_config_nolock($vmid, { lock => 'migrate' }, {}, 1); |
1e3baf05 | 246 | |
16e903f2 | 247 | sync_disks($self, $vmid); |
1ef75254 DM |
248 | |
249 | # move config to remote node | |
16e903f2 DM |
250 | my $conffile = PVE::QemuServer::config_file($vmid); |
251 | my $newconffile = PVE::QemuServer::config_file($vmid, $self->{node}); | |
1e3baf05 | 252 | |
16e903f2 | 253 | die "Failed to move config to node '$self->{node}' - rename failed: $!\n" |
1ef75254 | 254 | if !rename($conffile, $newconffile); |
1e3baf05 DM |
255 | }; |
256 | ||
16e903f2 DM |
257 | sub phase1_cleanup { |
258 | my ($self, $vmid, $err) = @_; | |
259 | ||
260 | $self->log('info', "aborting phase 1 - cleanup resources"); | |
261 | ||
262 | my $unset = { lock => 1 }; | |
263 | eval { PVE::QemuServer::change_config_nolock($vmid, {}, $unset, 1) }; | |
264 | if (my $err = $@) { | |
265 | $self->log('err', $err); | |
266 | } | |
267 | ||
268 | if ($self->{volumes}) { | |
269 | foreach my $volid (@{$self->{volumes}}) { | |
270 | $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); | |
271 | # fixme: try to remove ? | |
272 | } | |
273 | } | |
274 | } | |
275 | ||
1e3baf05 | 276 | sub phase2 { |
16e903f2 | 277 | my ($self, $vmid) = @_; |
1e3baf05 | 278 | |
16e903f2 DM |
279 | my $conf = $self->{vmconf}; |
280 | ||
46a84fd4 | 281 | $self->log('info', "starting VM $vmid on remote node '$self->{node}'"); |
1e3baf05 DM |
282 | |
283 | my $rport; | |
284 | ||
19672434 | 285 | ## start on remote node |
16e903f2 DM |
286 | my $cmd = [@{$self->{rem_ssh}}, 'qm', 'start', |
287 | $vmid, '--stateuri', 'tcp', '--skiplock']; | |
1e3baf05 | 288 | |
72afda82 | 289 | PVE::Tools::run_command($cmd, outfunc => sub { |
1e3baf05 DM |
290 | my $line = shift; |
291 | ||
292 | if ($line =~ m/^migration listens on port (\d+)$/) { | |
293 | $rport = $1; | |
294 | } | |
72afda82 | 295 | }, errfunc => sub {}); |
1e3baf05 DM |
296 | |
297 | die "unable to detect remote migration port\n" if !$rport; | |
298 | ||
16e903f2 | 299 | $self->log('info', "starting migration tunnel"); |
1ef75254 | 300 | |
1e3baf05 | 301 | ## create tunnel to remote port |
1ef75254 | 302 | my $lport = PVE::QemuServer::next_migrate_port(); |
16e903f2 | 303 | $self->{tunnel} = $self->fork_tunnel($self->{nodeip}, $lport, $rport); |
1e3baf05 | 304 | |
d68afb26 | 305 | $self->log('info', "starting online/live migration on port $lport"); |
1e3baf05 DM |
306 | # start migration |
307 | ||
308 | my $start = time(); | |
309 | ||
d68afb26 | 310 | my $merr = PVE::QemuServer::vm_monitor_command($vmid, "migrate -d \"tcp:localhost:$lport\"", 1); |
1e3baf05 DM |
311 | |
312 | my $lstat = ''; | |
313 | while (1) { | |
314 | sleep (2); | |
16e903f2 | 315 | my $stat = PVE::QemuServer::vm_monitor_command($vmid, "info migrate", 1); |
1e3baf05 | 316 | if ($stat =~ m/^Migration status: (active|completed|failed|cancelled)$/im) { |
d68afb26 | 317 | $merr = undef; |
1e3baf05 DM |
318 | my $ms = $1; |
319 | ||
320 | if ($stat ne $lstat) { | |
321 | if ($ms eq 'active') { | |
322 | my ($trans, $rem, $total) = (0, 0, 0); | |
323 | $trans = $1 if $stat =~ m/^transferred ram: (\d+) kbytes$/im; | |
324 | $rem = $1 if $stat =~ m/^remaining ram: (\d+) kbytes$/im; | |
325 | $total = $1 if $stat =~ m/^total ram: (\d+) kbytes$/im; | |
326 | ||
16e903f2 DM |
327 | $self->log('info', "migration status: $ms (transferred ${trans}KB, " . |
328 | "remaining ${rem}KB), total ${total}KB)"); | |
1e3baf05 | 329 | } else { |
16e903f2 | 330 | $self->log('info', "migration status: $ms"); |
1e3baf05 DM |
331 | } |
332 | } | |
333 | ||
334 | if ($ms eq 'completed') { | |
335 | my $delay = time() - $start; | |
336 | if ($delay > 0) { | |
337 | my $mbps = sprintf "%.2f", $conf->{memory}/$delay; | |
16e903f2 | 338 | $self->log('info', "migration speed: $mbps MB/s"); |
1e3baf05 DM |
339 | } |
340 | } | |
16e903f2 | 341 | |
1e3baf05 DM |
342 | if ($ms eq 'failed' || $ms eq 'cancelled') { |
343 | die "aborting\n" | |
344 | } | |
345 | ||
346 | last if $ms ne 'active'; | |
347 | } else { | |
d68afb26 | 348 | die $merr if $merr; |
1e3baf05 DM |
349 | die "unable to parse migration status '$stat' - aborting\n"; |
350 | } | |
351 | $lstat = $stat; | |
352 | }; | |
353 | } | |
16e903f2 DM |
354 | |
355 | sub phase3 { | |
356 | my ($self, $vmid) = @_; | |
357 | ||
358 | my $volids = $self->{volumes}; | |
359 | ||
360 | # destroy local copies | |
361 | foreach my $volid (@$volids) { | |
362 | eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; | |
363 | if (my $err = $@) { | |
364 | $self->log('err', "removing local copy of '$volid' failed - $err"); | |
365 | $self->{errors} = 1; | |
366 | last if $err =~ /^interrupted by signal$/; | |
367 | } | |
368 | } | |
369 | ||
370 | if ($self->{tunnel}) { | |
371 | eval { finish_tunnel($self, $self->{tunnel}); }; | |
372 | if (my $err = $@) { | |
373 | $self->log('err', $err); | |
374 | $self->{errors} = 1; | |
375 | } | |
376 | } | |
377 | } | |
378 | ||
379 | sub phase3_cleanup { | |
380 | my ($self, $vmid, $err) = @_; | |
381 | ||
382 | my $conf = $self->{vmconf}; | |
383 | ||
384 | # always stop local VM | |
385 | eval { PVE::QemuServer::vm_stop($self->{storecfg}, $vmid, 1, 1); }; | |
386 | if (my $err = $@) { | |
387 | $self->log('err', "stopping vm failed - $err"); | |
388 | $self->{errors} = 1; | |
389 | } | |
390 | ||
391 | # always deactivate volumes - avoid lvm LVs to be active on several nodes | |
392 | eval { | |
393 | my $vollist = PVE::QemuServer::get_vm_volumes($conf); | |
394 | PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); | |
395 | }; | |
396 | if (my $err = $@) { | |
397 | $self->log('err', $err); | |
398 | $self->{errors} = 1; | |
399 | } | |
400 | ||
401 | # clear migrate lock | |
402 | my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ]; | |
403 | $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); | |
404 | } | |
405 | ||
406 | sub final_cleanup { | |
407 | my ($self, $vmid) = @_; | |
408 | ||
409 | # nothing to do | |
410 | } | |
411 | ||
412 | 1; |