X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=src%2FPVE%2FLXC%2FMigrate.pm;h=35455e16fcfd273fb59b8f08df5dd023c3a248ef;hb=800f454dd328dd01113f0f5daa16bd2ce9b36d6a;hp=73af6d4bf9134fd3b1b181e77b01e1d45dd67468;hpb=235dbdf336f46ba16b8b8f62c55a161716575002;p=pve-container.git diff --git a/src/PVE/LXC/Migrate.pm b/src/PVE/LXC/Migrate.pm index 73af6d4..35455e1 100644 --- a/src/PVE/LXC/Migrate.pm +++ b/src/PVE/LXC/Migrate.pm @@ -2,17 +2,27 @@ package PVE::LXC::Migrate; use strict; use warnings; -use PVE::AbstractMigrate; + use File::Basename; use File::Copy; # fixme: remove -use PVE::Tools; -use PVE::INotify; + use PVE::Cluster; +use PVE::INotify; +use PVE::Replication; +use PVE::ReplicationConfig; +use PVE::ReplicationState; use PVE::Storage; +use PVE::Tools; + +use PVE::LXC::Config; use PVE::LXC; +use PVE::AbstractMigrate; use base qw(PVE::AbstractMigrate); +# compared against remote end's minimum version +our $WS_TUNNEL_VERSION = 2; + sub lock_vm { my ($self, $vmid, $code, @param) = @_; @@ -23,6 +33,8 @@ sub prepare { my ($self, $vmid) = @_; my $online = $self->{opts}->{online}; + my $restart= $self->{opts}->{restart}; + my $remote = $self->{opts}->{remote}; $self->{storecfg} = PVE::Storage::config(); @@ -33,27 +45,21 @@ sub prepare { my $running = 0; if (PVE::LXC::check_running($vmid)) { - die "lxc live migration is currently not implemented\n"; - - die "can't migrate running container without --online\n" if !$online; + die "lxc live migration is currently not implemented\n" if $online; + die "running container can only be migrated in restart mode" if !$restart; $running = 1; } + $self->{was_running} = $running; - my $force = $self->{opts}->{force} // 0; - my $need_activate = []; - - PVE::LXC::Config->foreach_mountpoint($conf, sub { + my $storages = {}; + PVE::LXC::Config->foreach_volume_full($conf, { include_unused => 1 }, sub { my ($ms, $mountpoint) = @_; my $volid = $mountpoint->{volume}; my $type = $mountpoint->{type}; - # skip dev/bind mps when forced / shared + # skip dev/bind mps when shared if ($type ne 'volume') { - if ($force) { - warn "-force is deprecated, please use the 'shared' property on individual non-volume mount points instead!\n"; - return; - } if ($mountpoint->{shared}) { return; } else { @@ -65,33 +71,94 @@ sub prepare { die "can't determine assigned storage for mount point '$ms'\n" if !$storage; # check if storage is available on both nodes - my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $storage); - PVE::Storage::storage_check_node($self->{storecfg}, $storage, $self->{node}); + my $scfg = PVE::Storage::storage_check_enabled($self->{storecfg}, $storage); + + my $targetsid = $storage; + die "content type 'rootdir' is not available on storage '$storage'\n" + if !$scfg->{content}->{rootdir}; - if ($scfg->{shared}) { + if ($scfg->{shared} && !$remote) { # PVE::Storage::activate_storage checks this for non-shared storages my $plugin = PVE::Storage::Plugin->lookup($scfg->{type}); warn "Used shared storage '$storage' is not online on source node!\n" if !$plugin->check_connection($storage, $scfg); } else { - # only activate if not shared - push @$need_activate, $volid; - + # unless in restart mode because we shut the container down die "unable to migrate local mount point '$volid' while CT is running" - if $running; + if $running && !$restart; + + $targetsid = PVE::JSONSchema::map_id($self->{opts}->{storagemap}, $storage); } - }); + if (!$remote) { + my $target_scfg = PVE::Storage::storage_check_enabled($self->{storecfg}, $targetsid, $self->{node}); - PVE::Storage::activate_volumes($self->{storecfg}, $need_activate); + die "$volid: content type 'rootdir' is not available on storage '$targetsid'\n" + if !$target_scfg->{content}->{rootdir}; + } + + $storages->{$targetsid} = 1; + }); # todo: test if VM uses local resources - # test ssh connection - my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; - eval { $self->cmd_quiet($cmd); }; - die "Can't connect to destination address using public key\n" if $@; + if ($remote) { + # test & establish websocket connection + my $bridges = map_bridges($conf, $self->{opts}->{bridgemap}, 1); + + my $remote = $self->{opts}->{remote}; + my $conn = $remote->{conn}; + + my $log = sub { + my ($level, $msg) = @_; + $self->log($level, $msg); + }; + + my $websocket_url = "https://$conn->{host}:$conn->{port}/api2/json/nodes/$self->{node}/lxc/$remote->{vmid}/mtunnelwebsocket"; + my $url = "/nodes/$self->{node}/lxc/$remote->{vmid}/mtunnel"; + + my $tunnel_params = { + url => $websocket_url, + }; + + my $storage_list = join(',', keys %$storages); + my $bridge_list = join(',', keys %$bridges); + + my $req_params = { + storages => $storage_list, + bridges => $bridge_list, + }; + + my $tunnel = PVE::Tunnel::fork_websocket_tunnel($conn, $url, $req_params, $tunnel_params, $log); + my $min_version = $tunnel->{version} - $tunnel->{age}; + $self->log('info', "local WS tunnel version: $WS_TUNNEL_VERSION"); + $self->log('info', "remote WS tunnel version: $tunnel->{version}"); + $self->log('info', "minimum required WS tunnel version: $min_version"); + die "Remote tunnel endpoint not compatible, upgrade required\n" + if $WS_TUNNEL_VERSION < $min_version; + die "Remote tunnel endpoint too old, upgrade required\n" + if $WS_TUNNEL_VERSION > $tunnel->{version}; + + $self->log('info', "websocket tunnel started\n"); + $self->{tunnel} = $tunnel; + } else { + # test ssh connection + my $cmd = [ @{$self->{rem_ssh}}, '/bin/true' ]; + eval { $self->cmd_quiet($cmd); }; + die "Can't connect to destination address using public key\n" if $@; + } + + # in restart mode, we shutdown the container before migrating + if ($restart && $running) { + my $timeout = $self->{opts}->{timeout} // 180; + + $self->log('info', "shutdown CT $vmid\n"); + + PVE::LXC::vm_stop($vmid, 0, $timeout); + + $running = 0; + } return $running; } @@ -99,6 +166,8 @@ sub prepare { sub phase1 { my ($self, $vmid) = @_; + my $remote = $self->{opts}->{remote}; + $self->log('info', "starting migration of CT $self->{vmid} to node '$self->{node}' ($self->{nodeip})"); my $conf = $self->{vmconf}; @@ -128,17 +197,28 @@ sub phase1 { my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); - # check if storage is available on both nodes - my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid); - PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node}); + # check if storage is available on source node + my $scfg = PVE::Storage::storage_check_enabled($self->{storecfg}, $sid); + + my $targetsid = $sid; - if ($scfg->{shared}) { + if ($scfg->{shared} && !$remote) { $self->log('info', "volume '$volid' is on shared storage '$sid'") if !$snapname; return; + } else { + $targetsid = PVE::JSONSchema::map_id($self->{opts}->{storagemap}, $sid); } - $volhash->{$volid} = defined($snapname) ? 'snapshot' : 'config'; + PVE::Storage::storage_check_enabled($self->{storecfg}, $targetsid, $self->{node}) + if !$remote; + + my $bwlimit = $self->get_bwlimit($sid, $targetsid); + + $volhash->{$volid}->{ref} = defined($snapname) ? 'snapshot' : 'config'; + $volhash->{$volid}->{snapshots} = 1 if defined($snapname); + $volhash->{$volid}->{targetsid} = $targetsid; + $volhash->{$volid}->{bwlimit} = $bwlimit; my ($path, $owner) = PVE::Storage::path($self->{storecfg}, $volid); @@ -147,8 +227,8 @@ sub phase1 { if (defined($snapname)) { # we cannot migrate shapshots on local storage - # exceptions: 'zfspool' - if (($scfg->{type} eq 'zfspool')) { + # exceptions: 'zfspool', 'btrfs' + if ($scfg->{type} eq 'zfspool' || $scfg->{type} eq 'btrfs') { return; } die "non-migratable snapshot exists\n"; @@ -168,6 +248,9 @@ sub phase1 { eval { &$test_volid($volid, $snapname); + + die "remote migration with snapshots not supported yet\n" + if $remote && $snapname; }; &$log_error($@, $volid) if $@; @@ -177,21 +260,28 @@ sub phase1 { my @sids = PVE::Storage::storage_ids($self->{storecfg}); foreach my $storeid (@sids) { my $scfg = PVE::Storage::storage_config($self->{storecfg}, $storeid); - next if $scfg->{shared}; + next if $scfg->{shared} && !$remote; next if !PVE::Storage::storage_check_enabled($self->{storecfg}, $storeid, undef, 1); - # get list from PVE::Storage (for unused volumes) - my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid); + # get list from PVE::Storage (for unreferenced volumes) + my $dl = PVE::Storage::vdisk_list($self->{storecfg}, $storeid, $vmid, undef, 'rootdir'); next if @{$dl->{$storeid}} == 0; # check if storage is available on target node - PVE::Storage::storage_check_node($self->{storecfg}, $storeid, $self->{node}); + my $targetsid = PVE::JSONSchema::map_id($self->{opts}->{storagemap}, $storeid); + if (!$remote) { + my $target_scfg = PVE::Storage::storage_check_enabled($self->{storecfg}, $targetsid, $self->{node}); + + die "content type 'rootdir' is not available on storage '$targetsid'\n" + if !$target_scfg->{content}->{rootdir}; + } PVE::Storage::foreach_volid($dl, sub { my ($volid, $sid, $volname) = @_; - $volhash->{$volid} = 'storage'; + $volhash->{$volid}->{ref} = 'storage'; + $volhash->{$volid}->{targetsid} = $targetsid; }); } @@ -199,12 +289,11 @@ sub phase1 { foreach my $snapname (keys %{$conf->{snapshots}}) { &$test_volid($conf->{snapshots}->{$snapname}->{'vmstate'}, 0, undef) if defined($conf->{snapshots}->{$snapname}->{'vmstate'}); - PVE::LXC::Config->foreach_mountpoint($conf->{snapshots}->{$snapname}, $test_mp, $snapname); + PVE::LXC::Config->foreach_volume($conf->{snapshots}->{$snapname}, $test_mp, $snapname); } - # finally all currently used volumes - PVE::LXC::Config->foreach_mountpoint($conf, $test_mp); - + # finally all current volumes + PVE::LXC::Config->foreach_volume_full($conf, { include_unused => 1 }, $test_mp); # additional checks for local storage foreach my $volid (keys %$volhash) { @@ -212,11 +301,21 @@ sub phase1 { my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); my $scfg = PVE::Storage::storage_config($self->{storecfg}, $sid); - my $migratable = ($scfg->{type} eq 'dir') || ($scfg->{type} eq 'zfspool') || - ($scfg->{type} eq 'lvmthin') || ($scfg->{type} eq 'lvm'); + # TODO move to storage plugin layer? + my $migratable_storages = [ + 'dir', + 'zfspool', + 'lvmthin', + 'lvm', + 'btrfs', + ]; + if ($remote) { + push @$migratable_storages, 'cifs'; + push @$migratable_storages, 'nfs'; + } die "storage type '$scfg->{type}' not supported\n" - if !$migratable; + if !grep { $_ eq $scfg->{type} } @$migratable_storages; # image is a linked clone on local storage, se we can't migrate. if (my $basename = (PVE::Storage::parse_volname($self->{storecfg}, $volid))[3]) { @@ -227,11 +326,12 @@ sub phase1 { } foreach my $volid (sort keys %$volhash) { - if ($volhash->{$volid} eq 'storage') { + my $ref = $volhash->{$volid}->{ref}; + if ($ref eq 'storage') { $self->log('info', "found local volume '$volid' (via storage)\n"); - } elsif ($volhash->{$volid} eq 'config') { + } elsif ($ref eq 'config') { $self->log('info', "found local volume '$volid' (in current VM config)\n"); - } elsif ($volhash->{$volid} eq 'snapshot') { + } elsif ($ref eq 'snapshot') { $self->log('info', "found local volume '$volid' (referenced by snapshot(s))\n"); } else { $self->log('info', "found local volume '$volid'\n"); @@ -246,14 +346,80 @@ sub phase1 { die "can't migrate CT - check log\n"; } + my $rep_volumes; + + my $rep_cfg = PVE::ReplicationConfig->new(); + + if ($remote) { + die "cannot remote-migrate replicated VM\n" + if $rep_cfg->check_for_existing_jobs($vmid, 1); + } elsif (my $jobcfg = $rep_cfg->find_local_replication_job($vmid, $self->{node})) { + die "can't live migrate VM with replicated volumes\n" if $self->{running}; + my $start_time = time(); + my $logfunc = sub { my ($msg) = @_; $self->log('info', $msg); }; + $rep_volumes = PVE::Replication::run_replication( + 'PVE::LXC::Config', $jobcfg, $start_time, $start_time, $logfunc); + } + + my $opts = $self->{opts}; foreach my $volid (keys %$volhash) { - my ($sid, $volname) = PVE::Storage::parse_volume_id($volid); + next if $rep_volumes->{$volid}; push @{$self->{volumes}}, $volid; - PVE::Storage::storage_migrate($self->{storecfg}, $volid, $self->{nodeip}, $sid); - } - my $conffile = PVE::LXC::Config->config_file($vmid); - my $newconffile = PVE::LXC::Config->config_file($vmid, $self->{node}); + # JSONSchema and get_bandwidth_limit use kbps - storage_migrate bps + my $bwlimit = $volhash->{$volid}->{bwlimit}; + $bwlimit = $bwlimit * 1024 if defined($bwlimit); + + my $targetsid = $volhash->{$volid}->{targetsid}; + + my $new_volid = eval { + if ($remote) { + my $log = sub { + my ($level, $msg) = @_; + $self->log($level, $msg); + }; + + return PVE::StorageTunnel::storage_migrate( + $self->{tunnel}, + $self->{storecfg}, + $volid, + $self->{vmid}, + $remote->{vmid}, + $volhash->{$volid}, + $log, + ); + } else { + my $storage_migrate_opts = { + 'ratelimit_bps' => $bwlimit, + 'insecure' => $opts->{migration_type} eq 'insecure', + 'with_snapshots' => $volhash->{$volid}->{snapshots}, + 'allow_rename' => 1, + }; + + my $logfunc = sub { $self->log('info', $_[0]); }; + return PVE::Storage::storage_migrate( + $self->{storecfg}, + $volid, + $self->{ssh_info}, + $targetsid, + $storage_migrate_opts, + $logfunc, + ); + } + }; + + if (my $err = $@) { + die "storage migration for '$volid' to storage '$targetsid' failed - $err\n"; + } + + $self->{volume_map}->{$volid} = $new_volid; + $self->log('info', "volume '$volid' is '$new_volid' on the target\n"); + + eval { PVE::Storage::deactivate_volumes($self->{storecfg}, [$volid]); }; + if (my $err = $@) { + $self->log('warn', $err); + } + } if ($self->{running}) { die "implement me"; @@ -269,10 +435,37 @@ sub phase1 { my $vollist = PVE::LXC::Config->get_vm_volumes($conf); PVE::Storage::deactivate_volumes($self->{storecfg}, $vollist); - # move config - die "Failed to move config to node '$self->{node}' - rename failed: $!\n" - if !rename($conffile, $newconffile); + if ($remote) { + my $remote_conf = PVE::LXC::Config->load_config($vmid); + PVE::LXC::Config->update_volume_ids($remote_conf, $self->{volume_map}); + + my $bridges = map_bridges($remote_conf, $self->{opts}->{bridgemap}); + for my $target (keys $bridges->%*) { + for my $nic (keys $bridges->{$target}->%*) { + $self->log('info', "mapped: $nic from $bridges->{$target}->{$nic} to $target"); + } + } + my $conf_str = PVE::LXC::Config::write_pct_config("remote", $remote_conf); + + # TODO expose in PVE::Firewall? + my $vm_fw_conf_path = "/etc/pve/firewall/$vmid.fw"; + my $fw_conf_str; + $fw_conf_str = PVE::Tools::file_get_contents($vm_fw_conf_path) + if -e $vm_fw_conf_path; + my $params = { + conf => $conf_str, + 'firewall-config' => $fw_conf_str, + }; + PVE::Tunnel::write_tunnel($self->{tunnel}, 10, 'config', $params); + } else { + # transfer replication state before moving config + $self->transfer_replication_state() if $rep_volumes; + PVE::LXC::Config->update_volume_ids($conf, $self->{volume_map}); + PVE::LXC::Config->write_config($vmid, $conf); + PVE::LXC::Config->move_config_to_node($vmid, $self->{node}); + $self->switch_replication_job_target() if $rep_volumes; + } $self->{conf_migrated} = 1; } @@ -283,10 +476,19 @@ sub phase1_cleanup { if ($self->{volumes}) { foreach my $volid (@{$self->{volumes}}) { + if (my $mapped_volume = $self->{volume_map}->{$volid}) { + $volid = $mapped_volume; + } $self->log('err', "found stale volume copy '$volid' on node '$self->{node}'"); # fixme: try to remove ? } } + + if ($self->{opts}->{remote}) { + # cleans up remote volumes + PVE::Tunnel::finish_tunnel($self->{tunnel}, 1); + delete $self->{tunnel}; + } } sub phase3 { @@ -294,6 +496,9 @@ sub phase3 { my $volids = $self->{volumes}; + # handled below in final_cleanup + return if $self->{opts}->{remote}; + # destroy local copies foreach my $volid (@$volids) { eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; @@ -311,17 +516,69 @@ sub final_cleanup { $self->log('info', "start final cleanup"); if (!$self->{conf_migrated}) { - my $conf = $self->{vmconf}; - delete $conf->{lock}; - - eval { PVE::LXC::Config->write_config($vmid, $conf); }; + eval { PVE::LXC::Config->remove_lock($vmid, 'migrate'); }; if (my $err = $@) { $self->log('err', $err); } + # in restart mode, we start the container on the source node on migration error + if ($self->{opts}->{restart} && $self->{was_running}) { + $self->log('info', "start container on source node"); + my $skiplock = 1; + PVE::LXC::vm_start($vmid, $self->{vmconf}, $skiplock); + } + } elsif ($self->{opts}->{remote}) { + eval { PVE::Tunnel::write_tunnel($self->{tunnel}, 10, 'unlock') }; + $self->log('err', "Failed to clear migrate lock - $@\n") if $@; + + if ($self->{opts}->{restart} && $self->{was_running}) { + $self->log('info', "start container on target node"); + PVE::Tunnel::write_tunnel($self->{tunnel}, 60, 'start'); + } + if ($self->{opts}->{delete}) { + PVE::LXC::destroy_lxc_container( + PVE::Storage::config(), + $vmid, + PVE::LXC::Config->load_config($vmid), + undef, + 0, + ); + } + PVE::Tunnel::finish_tunnel($self->{tunnel}); } else { my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'unlock', $vmid ]; - $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); + $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock"); + + # in restart mode, we start the container on the target node after migration + if ($self->{opts}->{restart} && $self->{was_running}) { + $self->log('info', "start container on target node"); + my $cmd = [ @{$self->{rem_ssh}}, 'pct', 'start', $vmid]; + $self->cmd($cmd); + } } } +sub map_bridges { + my ($conf, $map, $scan_only) = @_; + + my $bridges = {}; + + foreach my $opt (keys %$conf) { + next if $opt !~ m/^net\d+$/; + + next if !$conf->{$opt}; + my $d = PVE::LXC::Config->parse_lxc_network($conf->{$opt}); + next if !$d || !$d->{bridge}; + + my $target_bridge = PVE::JSONSchema::map_id($map, $d->{bridge}); + $bridges->{$target_bridge}->{$opt} = $d->{bridge}; + + next if $scan_only; + + $d->{bridge} = $target_bridge; + $conf->{$opt} = PVE::LXC::Config->print_lxc_network($d); + } + + return $bridges; +} + 1;