]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC/Migrate.pm
migrate: cleanup replica volume skip condition
[pve-container.git] / src / PVE / LXC / Migrate.pm
1 package PVE::LXC::Migrate;
2
3 use strict;
4 use warnings;
5 use PVE::AbstractMigrate;
6 use File::Basename;
7 use File::Copy; # fixme: remove
8 use PVE::Tools;
9 use PVE::INotify;
10 use PVE::Cluster;
11 use PVE::Storage;
12 use PVE::LXC;
13 use PVE::ReplicationTools;
14
15 use base qw(PVE::AbstractMigrate);
16
17 sub lock_vm {
18 my ($self, $vmid, $code, @param) = @_;
19
20 return PVE::LXC::Config->lock_config($vmid, $code, @param);
21 }
22
23 sub prepare {
24 my ($self, $vmid) = @_;
25
26 my $online = $self->{opts}->{online};
27 my $restart= $self->{opts}->{restart};
28
29 $self->{storecfg} = PVE::Storage::config();
30
31 # test if CT exists
32 my $conf = $self->{vmconf} = PVE::LXC::Config->load_config($vmid);
33
34 PVE::LXC::Config->check_lock($conf);
35
36 my $running = 0;
37 if (PVE::LXC::check_running($vmid)) {
38 die "lxc live migration is currently not implemented\n" if $online;
39 die "running container can only be migrated in restart mode" if !$restart;
40 $running = 1;
41 }
42 $self->{was_running} = $running;
43
44 my $force = $self->{opts}->{force} // 0;
45 my $need_activate = [];
46
47 PVE::LXC::Config->foreach_mountpoint($conf, sub {
48 my ($ms, $mountpoint) = @_;
49
50 my $volid = $mountpoint->{volume};
51 my $type = $mountpoint->{type};
52
53 # skip dev/bind mps when forced / shared
54 if ($type ne 'volume') {
55 if ($force) {
56 warn "-force is deprecated, please use the 'shared' property on individual non-volume mount points instead!\n";
57 return;
58 }
59 if ($mountpoint->{shared}) {
60 return;
61 } else {
62 die "cannot migrate local $type mount point '$ms'\n";
63 }
64 }
65
66 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1) if $volid;
67 die "can't determine assigned storage for mount point '$ms'\n" if !$storage;
68
69 # check if storage is available on both nodes
70 my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $storage);
71 PVE::Storage::storage_check_node($self->{storecfg}, $storage, $self->{node});
72
73
74 if ($scfg->{shared}) {
75 # PVE::Storage::activate_storage checks this for non-shared storages
76 my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
77 warn "Used shared storage '$storage' is not online on source node!\n"
78 if !$plugin->check_connection($storage, $scfg);
79 } else {
80 # only activate if not shared
81 push @$need_activate, $volid;
82
83 # unless in restart mode because we shut the container down
84 die "unable to migrate local mount point '$volid' while CT is running"
85 if $running && !$restart;
86 }
87
88 });
89
90 PVE::Storage::activate_volumes($self->{storecfg}, $need_activate);
91
92 # todo: test if VM uses local resources
93
94 # test ssh connection
95 my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ];
96 eval { $self->cmd_quiet($cmd); };
97 die "Can't connect to destination address using public key\n" if $@;
98
99 # in restart mode, we shutdown the container before migrating
100 if ($restart && $running) {
101 my $timeout = $self->{opts}->{timeout} // 180;
102
103 $self->log('info', "shutdown CT $vmid\n");
104
105 my $cmd = ['lxc-stop', '-n', $vmid, '--timeout', $timeout];
106 $self->cmd($cmd, timeout => $timeout + 5);
107
108 # make sure container is stopped
109 $cmd = ['lxc-wait', '-n', $vmid, '-t', 5, '-s', 'STOPPED'];
110 $self->cmd($cmd);
111
112 $running = 0;
113 }
114
115 return $running;
116 }
117
118 sub phase1 {
119 my ($self, $vmid) = @_;
120
121 $self->log('info', "starting migration of CT $self->{vmid} to node '$self->{node}' ($self->{nodeip})");
122
123 my $conf = $self->{vmconf};
124 $conf->{lock} = 'migrate';
125 PVE::LXC::Config->write_config($vmid, $conf);
126
127 if ($self->{running}) {
128 $self->log('info', "container is running - using online migration");
129 }
130
131 $self->{volumes} = []; # list of already migrated volumes
132 my $volhash = {}; # 'config', 'snapshot' or 'storage' for local volumes
133 my $volhash_errors = {};
134 my $abort = 0;
135
136 my $log_error = sub {
137 my ($msg, $volid) = @_;
138
139 $volhash_errors->{$volid} = $msg if !defined($volhash_errors->{$volid});
140 $abort = 1;
141 };
142
143 my $test_volid = sub {
144 my ($volid, $snapname) = @_;
145
146 return if !$volid;
147
148 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
149
150 # check if storage is available on both nodes
151 my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid);
152 PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node});
153
154 if ($scfg->{shared}) {
155 $self->log('info', "volume '$volid' is on shared storage '$sid'")
156 if !$snapname;
157 return;
158 }
159
160 $volhash->{$volid} = defined($snapname) ? 'snapshot' : 'config';
161
162 my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid);
163
164 die "owned by other guest (owner = $owner)\n"
165 if !$owner || ($owner != $self->{vmid});
166
167 if (defined($snapname)) {
168 # we cannot migrate shapshots on local storage
169 # exceptions: 'zfspool'
170 if (($scfg->{type} eq 'zfspool')) {
171 return;
172 }
173 die "non-migratable snapshot exists\n";
174 }
175 };
176
177 my $test_mp = sub {
178 my ($ms, $mountpoint, $snapname) = @_;
179
180 my $volid = $mountpoint->{volume};
181 # already checked in prepare
182 if ($mountpoint->{type} ne 'volume') {
183 $self->log('info', "ignoring shared '$mountpoint->{type}' mount point '$ms' ('$volid')")
184 if !$snapname;
185 return;
186 }
187
188 eval {
189 &$test_volid($volid, $snapname);
190 };
191
192 &$log_error($@, $volid) if $@;
193 };
194
195 # first unused / lost volumes owned by this container
196 my @sids = PVE::Storage::storage_ids($self->{storecfg});
197 foreach my $storeid (@sids) {
198 my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid);
199 next if $scfg->{shared};
200 next if !PVE::Storage::storage_check_enabled($self->{storecfg}, $storeid, undef, 1);
201
202 # get list from PVE::Storage (for unused volumes)
203 my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid);
204
205 next if @{$dl->{$storeid}} == 0;
206
207 # check if storage is available on target node
208 PVE::Storage::storage_check_node($self->{storecfg}, $storeid, $self->{node});
209
210 PVE::Storage::foreach_volid($dl, sub {
211 my ($volid, $sid, $volname) = @_;
212
213 $volhash->{$volid} = 'storage';
214 });
215 }
216
217 # then all volumes referenced in snapshots
218 foreach my $snapname (keys %{$conf->{snapshots}}) {
219 &$test_volid($conf->{snapshots}->{$snapname}->{'vmstate'}, 0, undef)
220 if defined($conf->{snapshots}->{$snapname}->{'vmstate'});
221 PVE::LXC::Config->foreach_mountpoint($conf->{snapshots}->{$snapname}, $test_mp, $snapname);
222 }
223
224 # finally all currently used volumes
225 PVE::LXC::Config->foreach_mountpoint($conf, $test_mp);
226
227
228 # additional checks for local storage
229 foreach my $volid (keys %$volhash) {
230 eval {
231 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
232 my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid);
233
234 my $migratable = ($scfg->{type} eq 'dir') || ($scfg->{type} eq 'zfspool') ||
235 ($scfg->{type} eq 'lvmthin') || ($scfg->{type} eq 'lvm');
236
237 die "storage type '$scfg->{type}' not supported\n"
238 if !$migratable;
239
240 # image is a linked clone on local storage, se we can't migrate.
241 if (my $basename = (PVE::Storage::parse_volname($self->{storecfg}, $volid))[3]) {
242 die "clone of '$basename'";
243 }
244 };
245 &$log_error($@, $volid) if $@;
246 }
247
248 foreach my $volid (sort keys %$volhash) {
249 if ($volhash->{$volid} eq 'storage') {
250 $self->log('info', "found local volume '$volid' (via storage)\n");
251 } elsif ($volhash->{$volid} eq 'config') {
252 $self->log('info', "found local volume '$volid' (in current VM config)\n");
253 } elsif ($volhash->{$volid} eq 'snapshot') {
254 $self->log('info', "found local volume '$volid' (referenced by snapshot(s))\n");
255 } else {
256 $self->log('info', "found local volume '$volid'\n");
257 }
258 }
259
260 foreach my $volid (sort keys %$volhash_errors) {
261 $self->log('warn', "can't migrate local volume '$volid': $volhash_errors->{$volid}");
262 }
263
264 if ($abort) {
265 die "can't migrate CT - check log\n";
266 }
267
268 foreach my $volid (keys %$volhash) {
269 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
270 push @{$self->{volumes}}, $volid;
271 PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid);
272 }
273
274 # set new replica_target if we migrate to replica target.
275 if ($conf->{replica}) {
276 $self->log('info', "change replica target to Node: $self->{opts}->{node}");
277 if ($conf->{replica_target} eq $self->{node}) {
278 $conf->{replica_target} = $self->{opts}->{node};
279 }
280
281 PVE::ReplicationTools::job_remove($vmid);
282 PVE::LXC::Config->write_config($vmid, $conf);
283 }
284
285 my $conffile = PVE::LXC::Config->config_file($vmid);
286 my $newconffile = PVE::LXC::Config->config_file($vmid, $self->{node});
287
288 if ($self->{running}) {
289 die "implement me";
290 }
291
292 # make sure everything on (shared) storage is unmounted
293 # Note: we must be 100% sure, else we get data corruption because
294 # non-shared file system could be mounted twice (on shared storage)
295
296 PVE::LXC::umount_all($vmid, $self->{storecfg}, $conf);
297
298 #to be sure there are no active volumes
299 my $vollist = PVE::LXC::Config->get_vm_volumes($conf);
300 PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist);
301
302 # move config
303 die "Failed to move config to node '$self->{node}' - rename failed: $!\n"
304 if !rename($conffile, $newconffile);
305
306 $self->{conf_migrated} = 1;
307 }
308
309 sub phase1_cleanup {
310 my ($self, $vmid, $err) = @_;
311
312 $self->log('info', "aborting phase 1 - cleanup resources");
313
314 if ($self->{volumes}) {
315 foreach my $volid (@{$self->{volumes}}) {
316 $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'");
317 # fixme: try to remove ?
318 }
319 }
320 }
321
322 sub phase3 {
323 my ($self, $vmid) = @_;
324
325 my $volids = $self->{volumes};
326
327 my $synced_volumes = PVE::ReplicationTools::get_syncable_guestdisks($self->{vmconf}, 'lxc')
328 if $self->{vmconf}->{replica};
329
330 # destroy local copies
331 foreach my $volid (@$volids) {
332 # do not destroy if new target is local_host
333 next if $self->{vmconf}->{replica} &&
334 defined($synced_volumes->{$volid}) &&
335 $self->{vmconf}->{replica_target} eq $self->{opts}->{node};
336
337 eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
338 if (my $err = $@) {
339 $self->log('err', "removing local copy of '$volid' failed - $err");
340 $self->{errors} = 1;
341 last if $err =~ /^interrupted by signal$/;
342 }
343 }
344 }
345
346 sub final_cleanup {
347 my ($self, $vmid) = @_;
348
349 $self->log('info', "start final cleanup");
350
351 if (!$self->{conf_migrated}) {
352 my $conf = $self->{vmconf};
353 delete $conf->{lock};
354
355 eval { PVE::LXC::Config->write_config($vmid, $conf); };
356 if (my $err = $@) {
357 $self->log('err', $err);
358 }
359 } else {
360 my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'unlock', $vmid ];
361 $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock");
362 }
363
364 if ($self->{vmconf}->{replica}) {
365 my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'set', $vmid, '--replica'];
366 $self->cmd_logerr($cmd, errmsg => "failed to activate replica");
367 }
368
369 # in restart mode, we start the container on the target node
370 # after migration
371 if ($self->{opts}->{restart} && $self->{was_running}) {
372 $self->log('info', "start container on target node");
373 my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'start', $vmid];
374 $self->cmd($cmd);
375 }
376 }
377
378 1;