]> git.proxmox.com Git - pve-container.git/blob - src/PVE/LXC/Migrate.pm
implement lxc restart migration
[pve-container.git] / src / PVE / LXC / Migrate.pm
1 package PVE::LXC::Migrate;
2
3 use strict;
4 use warnings;
5 use PVE::AbstractMigrate;
6 use File::Basename;
7 use File::Copy; # fixme: remove
8 use PVE::Tools;
9 use PVE::INotify;
10 use PVE::Cluster;
11 use PVE::Storage;
12 use PVE::LXC;
13
14 use base qw(PVE::AbstractMigrate);
15
16 sub lock_vm {
17 my ($self, $vmid, $code, @param) = @_;
18
19 return PVE::LXC::Config->lock_config($vmid, $code, @param);
20 }
21
22 sub prepare {
23 my ($self, $vmid) = @_;
24
25 my $online = $self->{opts}->{online};
26 my $restart= $self->{opts}->{restart};
27
28 $self->{storecfg} = PVE::Storage::config();
29
30 # test if CT exists
31 my $conf = $self->{vmconf} = PVE::LXC::Config->load_config($vmid);
32
33 PVE::LXC::Config->check_lock($conf);
34
35 my $running = 0;
36 if (PVE::LXC::check_running($vmid)) {
37 die "lxc live migration is currently not implemented\n" if $online;
38 die "running container can only be migrated in restart mode" if !$restart;
39 $running = 1;
40 }
41 $self->{was_running} = $running;
42
43 my $force = $self->{opts}->{force} // 0;
44 my $need_activate = [];
45
46 PVE::LXC::Config->foreach_mountpoint($conf, sub {
47 my ($ms, $mountpoint) = @_;
48
49 my $volid = $mountpoint->{volume};
50 my $type = $mountpoint->{type};
51
52 # skip dev/bind mps when forced / shared
53 if ($type ne 'volume') {
54 if ($force) {
55 warn "-force is deprecated, please use the 'shared' property on individual non-volume mount points instead!\n";
56 return;
57 }
58 if ($mountpoint->{shared}) {
59 return;
60 } else {
61 die "cannot migrate local $type mount point '$ms'\n";
62 }
63 }
64
65 my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1) if $volid;
66 die "can't determine assigned storage for mount point '$ms'\n" if !$storage;
67
68 # check if storage is available on both nodes
69 my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $storage);
70 PVE::Storage::storage_check_node($self->{storecfg}, $storage, $self->{node});
71
72
73 if ($scfg->{shared}) {
74 # PVE::Storage::activate_storage checks this for non-shared storages
75 my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
76 warn "Used shared storage '$storage' is not online on source node!\n"
77 if !$plugin->check_connection($storage, $scfg);
78 } else {
79 # only activate if not shared
80 push @$need_activate, $volid;
81
82 # unless in restart mode because we shut the container down
83 die "unable to migrate local mount point '$volid' while CT is running"
84 if $running && !$restart;
85 }
86
87 });
88
89 PVE::Storage::activate_volumes($self->{storecfg}, $need_activate);
90
91 # todo: test if VM uses local resources
92
93 # test ssh connection
94 my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ];
95 eval { $self->cmd_quiet($cmd); };
96 die "Can't connect to destination address using public key\n" if $@;
97
98 # in restart mode, we shutdown the container before migrating
99 if ($restart && $running) {
100 my $timeout = $self->{opts}->{timeout} // 180;
101
102 $self->log('info', "shutdown CT $vmid\n");
103
104 my $cmd = ['lxc-stop', '-n', $vmid, '--timeout', $timeout];
105 $self->cmd($cmd, timeout => $timeout + 5);
106
107 # make sure container is stopped
108 $cmd = ['lxc-wait', '-n', $vmid, '-t', 5, '-s', 'STOPPED'];
109 $self->cmd($cmd);
110
111 $running = 0;
112 }
113
114 return $running;
115 }
116
117 sub phase1 {
118 my ($self, $vmid) = @_;
119
120 $self->log('info', "starting migration of CT $self->{vmid} to node '$self->{node}' ($self->{nodeip})");
121
122 my $conf = $self->{vmconf};
123 $conf->{lock} = 'migrate';
124 PVE::LXC::Config->write_config($vmid, $conf);
125
126 if ($self->{running}) {
127 $self->log('info', "container is running - using online migration");
128 }
129
130 $self->{volumes} = []; # list of already migrated volumes
131 my $volhash = {}; # 'config', 'snapshot' or 'storage' for local volumes
132 my $volhash_errors = {};
133 my $abort = 0;
134
135 my $log_error = sub {
136 my ($msg, $volid) = @_;
137
138 $volhash_errors->{$volid} = $msg if !defined($volhash_errors->{$volid});
139 $abort = 1;
140 };
141
142 my $test_volid = sub {
143 my ($volid, $snapname) = @_;
144
145 return if !$volid;
146
147 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
148
149 # check if storage is available on both nodes
150 my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid);
151 PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node});
152
153 if ($scfg->{shared}) {
154 $self->log('info', "volume '$volid' is on shared storage '$sid'")
155 if !$snapname;
156 return;
157 }
158
159 $volhash->{$volid} = defined($snapname) ? 'snapshot' : 'config';
160
161 my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid);
162
163 die "owned by other guest (owner = $owner)\n"
164 if !$owner || ($owner != $self->{vmid});
165
166 if (defined($snapname)) {
167 # we cannot migrate shapshots on local storage
168 # exceptions: 'zfspool'
169 if (($scfg->{type} eq 'zfspool')) {
170 return;
171 }
172 die "non-migratable snapshot exists\n";
173 }
174 };
175
176 my $test_mp = sub {
177 my ($ms, $mountpoint, $snapname) = @_;
178
179 my $volid = $mountpoint->{volume};
180 # already checked in prepare
181 if ($mountpoint->{type} ne 'volume') {
182 $self->log('info', "ignoring shared '$mountpoint->{type}' mount point '$ms' ('$volid')")
183 if !$snapname;
184 return;
185 }
186
187 eval {
188 &$test_volid($volid, $snapname);
189 };
190
191 &$log_error($@, $volid) if $@;
192 };
193
194 # first unused / lost volumes owned by this container
195 my @sids = PVE::Storage::storage_ids($self->{storecfg});
196 foreach my $storeid (@sids) {
197 my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid);
198 next if $scfg->{shared};
199 next if !PVE::Storage::storage_check_enabled($self->{storecfg}, $storeid, undef, 1);
200
201 # get list from PVE::Storage (for unused volumes)
202 my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid);
203
204 next if @{$dl->{$storeid}} == 0;
205
206 # check if storage is available on target node
207 PVE::Storage::storage_check_node($self->{storecfg}, $storeid, $self->{node});
208
209 PVE::Storage::foreach_volid($dl, sub {
210 my ($volid, $sid, $volname) = @_;
211
212 $volhash->{$volid} = 'storage';
213 });
214 }
215
216 # then all volumes referenced in snapshots
217 foreach my $snapname (keys %{$conf->{snapshots}}) {
218 &$test_volid($conf->{snapshots}->{$snapname}->{'vmstate'}, 0, undef)
219 if defined($conf->{snapshots}->{$snapname}->{'vmstate'});
220 PVE::LXC::Config->foreach_mountpoint($conf->{snapshots}->{$snapname}, $test_mp, $snapname);
221 }
222
223 # finally all currently used volumes
224 PVE::LXC::Config->foreach_mountpoint($conf, $test_mp);
225
226
227 # additional checks for local storage
228 foreach my $volid (keys %$volhash) {
229 eval {
230 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
231 my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid);
232
233 my $migratable = ($scfg->{type} eq 'dir') || ($scfg->{type} eq 'zfspool') ||
234 ($scfg->{type} eq 'lvmthin') || ($scfg->{type} eq 'lvm');
235
236 die "storage type '$scfg->{type}' not supported\n"
237 if !$migratable;
238
239 # image is a linked clone on local storage, se we can't migrate.
240 if (my $basename = (PVE::Storage::parse_volname($self->{storecfg}, $volid))[3]) {
241 die "clone of '$basename'";
242 }
243 };
244 &$log_error($@, $volid) if $@;
245 }
246
247 foreach my $volid (sort keys %$volhash) {
248 if ($volhash->{$volid} eq 'storage') {
249 $self->log('info', "found local volume '$volid' (via storage)\n");
250 } elsif ($volhash->{$volid} eq 'config') {
251 $self->log('info', "found local volume '$volid' (in current VM config)\n");
252 } elsif ($volhash->{$volid} eq 'snapshot') {
253 $self->log('info', "found local volume '$volid' (referenced by snapshot(s))\n");
254 } else {
255 $self->log('info', "found local volume '$volid'\n");
256 }
257 }
258
259 foreach my $volid (sort keys %$volhash_errors) {
260 $self->log('warn', "can't migrate local volume '$volid': $volhash_errors->{$volid}");
261 }
262
263 if ($abort) {
264 die "can't migrate CT - check log\n";
265 }
266
267 foreach my $volid (keys %$volhash) {
268 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
269 push @{$self->{volumes}}, $volid;
270 PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid);
271 }
272
273 my $conffile = PVE::LXC::Config->config_file($vmid);
274 my $newconffile = PVE::LXC::Config->config_file($vmid, $self->{node});
275
276 if ($self->{running}) {
277 die "implement me";
278 }
279
280 # make sure everything on (shared) storage is unmounted
281 # Note: we must be 100% sure, else we get data corruption because
282 # non-shared file system could be mounted twice (on shared storage)
283
284 PVE::LXC::umount_all($vmid, $self->{storecfg}, $conf);
285
286 #to be sure there are no active volumes
287 my $vollist = PVE::LXC::Config->get_vm_volumes($conf);
288 PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist);
289
290 # move config
291 die "Failed to move config to node '$self->{node}' - rename failed: $!\n"
292 if !rename($conffile, $newconffile);
293
294 $self->{conf_migrated} = 1;
295 }
296
297 sub phase1_cleanup {
298 my ($self, $vmid, $err) = @_;
299
300 $self->log('info', "aborting phase 1 - cleanup resources");
301
302 if ($self->{volumes}) {
303 foreach my $volid (@{$self->{volumes}}) {
304 $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'");
305 # fixme: try to remove ?
306 }
307 }
308 }
309
310 sub phase3 {
311 my ($self, $vmid) = @_;
312
313 my $volids = $self->{volumes};
314
315 # destroy local copies
316 foreach my $volid (@$volids) {
317 eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
318 if (my $err = $@) {
319 $self->log('err', "removing local copy of '$volid' failed - $err");
320 $self->{errors} = 1;
321 last if $err =~ /^interrupted by signal$/;
322 }
323 }
324 }
325
326 sub final_cleanup {
327 my ($self, $vmid) = @_;
328
329 $self->log('info', "start final cleanup");
330
331 if (!$self->{conf_migrated}) {
332 my $conf = $self->{vmconf};
333 delete $conf->{lock};
334
335 eval { PVE::LXC::Config->write_config($vmid, $conf); };
336 if (my $err = $@) {
337 $self->log('err', $err);
338 }
339 } else {
340 my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'unlock', $vmid ];
341 $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock");
342 }
343
344 # in restart mode, we start the container on the target node
345 # after migration
346 if ($self->{opts}->{restart} && $self->{was_running}) {
347 $self->log('info', "start container on target node");
348 my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'start', $vmid];
349 $self->cmd($cmd);
350 }
351 }
352
353 1;