From f9dde219f23108d7c8991ddb51c6be44cc9438b9 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Mon, 4 Oct 2021 17:29:20 +0200 Subject: [PATCH] fix #3075: add TPM v1.2 and v2.0 support via swtpm Starts an instance of swtpm per VM in it's systemd scope, it will terminate by itself if the VM exits, or be terminated manually if startup fails. Before first use, a TPM state is created via swtpm_setup. State is stored in a 'tpmstate0' volume, treated much the same way as an efidisk. It is migrated 'offline', the important part here is the creation of the target volume, the actual data transfer happens via the QEMU device state migration process. Move-disk can only work offline, as the disk is not registered with QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of moving a backing storage at runtime. For backups, a bit of a workaround is necessary (this may later be replaced by NBD support in swtpm): During the backup, we attach the backing file of the TPM as a read-only drive to QEMU, so our backup code can detect it as a block device and back it up as such, while ensuring consistency with the rest of disk state ("snapshot" semantic). The name for the ephemeral drive is specifically chosen as 'drive-tpmstate0-backup', diverging from our usual naming scheme with the '-backup' suffix, to avoid it ever being treated as a regular drive from the rest of the stack in case it gets left over after a backup for some reason (shouldn't happen). Signed-off-by: Stefan Reiter --- PVE/API2/Qemu.pm | 5 ++ PVE/QemuMigrate.pm | 14 +++- PVE/QemuServer.pm | 137 +++++++++++++++++++++++++++++++++++++-- PVE/QemuServer/Drive.pm | 63 ++++++++++++++---- PVE/VZDump/QemuServer.pm | 43 ++++++++++-- 5 files changed, 238 insertions(+), 24 deletions(-) diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm index a8fbd9d..6228125 100644 --- a/PVE/API2/Qemu.pm +++ b/PVE/API2/Qemu.pm @@ -184,6 +184,11 @@ my $create_disks = sub { my $volid; if ($ds eq 'efidisk0') { ($volid, $size) = PVE::QemuServer::create_efidisk($storecfg, $storeid, $vmid, $fmt, $arch); + } elsif ($ds eq 'tpmstate0') { + # swtpm can only use raw volumes, and uses a fixed size + $size = PVE::Tools::convert_size(PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE, 'b' => 'kb'); + $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, + "raw", undef, $size); } else { $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $size); } diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm index 4f5bfa4..ae3eaf1 100644 --- a/PVE/QemuMigrate.pm +++ b/PVE/QemuMigrate.pm @@ -488,6 +488,7 @@ sub scan_local_volumes { $local_volumes->{$volid}->{ref} = $attr->{referenced_in_config} ? 'config' : 'snapshot'; $local_volumes->{$volid}->{ref} = 'storage' if $attr->{is_unused}; + $local_volumes->{$volid}->{ref} = 'generated' if $attr->{is_tpmstate}; $local_volumes->{$volid}->{is_vmstate} = $attr->{is_vmstate} ? 1 : 0; @@ -587,6 +588,9 @@ sub scan_local_volumes { $local_volumes->{$volid}->{migration_mode} = 'online'; } elsif ($self->{running} && $ref eq 'generated') { # offline migrate the cloud-init ISO and don't regenerate on VM start + # + # tpmstate will also be offline migrated first, and in case of + # live migration then updated by QEMU/swtpm if necessary $local_volumes->{$volid}->{migration_mode} = 'offline'; } else { $local_volumes->{$volid}->{migration_mode} = 'offline'; @@ -648,7 +652,9 @@ sub config_update_local_disksizes { PVE::QemuConfig->foreach_volume($conf, sub { my ($key, $drive) = @_; - return if $key eq 'efidisk0'; # skip efidisk, will be handled later + # skip special disks, will be handled later + return if $key eq 'efidisk0'; + return if $key eq 'tpmstate0'; my $volid = $drive->{file}; return if !defined($local_volumes->{$volid}); # only update sizes for local volumes @@ -665,6 +671,12 @@ sub config_update_local_disksizes { if (defined($conf->{efidisk0})) { PVE::QemuServer::update_efidisk_size($conf); } + + # TPM state might have an irregular filesize, to avoid problems on transfer + # we always assume the static size of 4M to allocate on the target + if (defined($conf->{tpmstate0})) { + PVE::QemuServer::update_tpmstate_size($conf); + } } sub filter_local_volumes { diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index e8047e8..0ca5e00 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -1143,7 +1143,8 @@ PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev); sub verify_bootdev { my ($dev, $noerr) = @_; - return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && $dev !~ m/^efidisk/; + my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/; + return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special; my $check = sub { my ($base) = @_; @@ -2966,6 +2967,90 @@ sub audio_devs { return $devs; } +sub get_tpm_paths { + my ($vmid) = @_; + return { + socket => "/var/run/qemu-server/$vmid.swtpm", + pid => "/var/run/qemu-server/$vmid.swtpm.pid", + }; +} + +sub add_tpm_device { + my ($vmid, $devices, $conf) = @_; + + return if !$conf->{tpmstate0}; + + my $paths = get_tpm_paths($vmid); + + push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}"; + push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar"; + push @$devices, "-device", "tpm-tis,tpmdev=tpmdev"; +} + +sub start_swtpm { + my ($storecfg, $vmid, $tpmdrive, $migration) = @_; + + return if !$tpmdrive; + + my $state; + my $tpm = parse_drive("tpmstate0", $tpmdrive); + my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1); + if ($storeid) { + $state = PVE::Storage::map_volume($storecfg, $tpm->{file}); + } else { + $state = $tpm->{file}; + } + + my $paths = get_tpm_paths($vmid); + + # during migration, we will get state from remote + # + if (!$migration) { + # run swtpm_setup to create a new TPM state if it doesn't exist yet + my $setup_cmd = [ + "swtpm_setup", + "--tpmstate", + "file://$state", + "--createek", + "--create-ek-cert", + "--create-platform-cert", + "--lock-nvram", + "--config", + "/etc/swtpm_setup.conf", # do not use XDG configs + "--runas", + "0", # force creation as root, error if not possible + "--not-overwrite", # ignore existing state, do not modify + ]; + + push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0'; + # TPM 2.0 supports ECC crypto, use if possible + push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0'; + + run_command($setup_cmd, outfunc => sub { + print "swtpm_setup: $1\n"; + }); + } + + my $emulator_cmd = [ + "swtpm", + "socket", + "--tpmstate", + "backend-uri=file://$state,mode=0600", + "--ctrl", + "type=unixio,path=$paths->{socket},mode=0600", + "--pid", + "file=$paths->{pid}", + "--terminate", # terminate on QEMU disconnect + "--daemon", + ]; + push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0'; + run_command($emulator_cmd, outfunc => sub { print $1; }); + + # return untainted PID of swtpm daemon so it can be killed on error + file_read_firstline($paths->{pid}) =~ m/(\d+)/; + return $1; +} + sub vga_conf_has_spice { my ($vga) = @_; @@ -3467,6 +3552,8 @@ sub config_to_command { push @$devices, @$audio_devs; } + add_tpm_device($vmid, $devices, $conf); + my $sockets = 1; $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused $sockets = $conf->{sockets} if $conf->{sockets}; @@ -3663,6 +3750,8 @@ sub config_to_command { # ignore efidisk here, already added in bios/fw handling code above return if $drive->{interface} eq 'efidisk'; + # similar for TPM + return if $drive->{interface} eq 'tpmstate'; $use_virtio = 1 if $ds =~ m/^virtio/; @@ -4524,6 +4613,9 @@ sub foreach_volid { $volhash->{$volid}->{is_vmstate} //= 0; $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate'; + $volhash->{$volid}->{is_tpmstate} //= 0; + $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0'; + $volhash->{$volid}->{is_unused} //= 0; $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/; @@ -4721,7 +4813,7 @@ sub vmconfig_hotplug_pending { vmconfig_update_net($storecfg, $conf, $hotplug_features->{network}, $vmid, $opt, $value, $arch, $machine_type); } elsif (is_valid_drivename($opt)) { - die "skip\n" if $opt eq 'efidisk0'; + die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0'; # some changes can be done without hotplug my $drive = parse_drive($opt, $value); if (drive_is_cloudinit($drive)) { @@ -5341,8 +5433,17 @@ sub vm_start_nolock { PVE::Tools::run_fork sub { PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties); + my $tpmpid; + if (my $tpm = $conf->{tpmstate0}) { + # start the TPM emulator so QEMU can connect on start + $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom); + } + my $exitcode = run_command($cmd, %run_params); - die "QEMU exited with code $exitcode\n" if $exitcode; + if ($exitcode) { + kill 'TERM', $tpmpid if $tpmpid; + die "QEMU exited with code $exitcode\n"; + } }; }; @@ -5542,6 +5643,14 @@ sub vm_stop_cleanup { if (!$keepActive) { my $vollist = get_vm_volumes($conf); PVE::Storage::deactivate_volumes($storecfg, $vollist); + + if (my $tpmdrive = $conf->{tpmstate0}) { + my $tpm = parse_drive("tpmstate0", $tpmdrive); + my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1); + if ($storeid) { + PVE::Storage::unmap_volume($storecfg, $tpm->{file}); + } + } } foreach my $ext (qw(mon qmp pid vnc qga)) { @@ -6079,7 +6188,7 @@ sub restore_update_config_line { $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr}; $netstr = print_net($net); $res .= "$id: $netstr\n"; - } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) { + } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) { my $virtdev = $1; my $value = $3; my $di = parse_drive($virtdev, $value); @@ -6397,8 +6506,8 @@ sub restore_proxmox_backup_archive { my $volid = $d->{volid}; my $path = PVE::Storage::path($storecfg, $volid); - # for live-restore we only want to preload the efidisk - next if $options->{live} && $virtdev ne 'efidisk0'; + # for live-restore we only want to preload the efidisk and TPM state + next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0'; my $pbs_restore_cmd = [ '/usr/bin/pbs-restore', @@ -6473,7 +6582,9 @@ sub restore_proxmox_backup_archive { my $conf = PVE::QemuConfig->load_config($vmid); die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf); - delete $devinfo->{'drive-efidisk0'}; # this special drive is already restored before start + # these special drives are already restored before start + delete $devinfo->{'drive-efidisk0'}; + delete $devinfo->{'drive-tpmstate0-backup'}; pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name); PVE::QemuConfig->remove_lock($vmid, "create"); @@ -7307,6 +7418,8 @@ sub clone_disk { $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE; } elsif ($drivename eq 'efidisk0') { $size = get_efivars_size($conf); + } elsif ($drivename eq 'tpmstate0') { + $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE; } else { ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10); } @@ -7347,6 +7460,8 @@ sub clone_disk { } } else { + die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0'; + my $kvmver = get_running_qemu_version ($vmid); if (!min_version($kvmver, 2, 7)) { die "drive-mirror with iothread requires qemu version 2.7 or higher\n" @@ -7417,6 +7532,14 @@ sub update_efidisk_size { return; } +sub update_tpmstate_size { + my ($conf) = @_; + + my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0}); + $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE; + $conf->{tpmstate0} = print_drive($disk); +} + sub create_efidisk($$$$$) { my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_; diff --git a/PVE/QemuServer/Drive.pm b/PVE/QemuServer/Drive.pm index 5110190..32c7377 100644 --- a/PVE/QemuServer/Drive.pm +++ b/PVE/QemuServer/Drive.pm @@ -306,16 +306,6 @@ my $virtiodesc = { }; PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc); -my $alldrive_fmt = { - %drivedesc_base, - %iothread_fmt, - %model_fmt, - %queues_fmt, - %scsiblock_fmt, - %ssd_fmt, - %wwn_fmt, -}; - my $efidisk_fmt = { volume => { alias => 'file' }, file => { @@ -345,6 +335,55 @@ my $efidisk_desc = { PVE::JSONSchema::register_standard_option("pve-qm-efidisk", $efidisk_desc); +my %tpmversion_fmt = ( + version => { + type => 'string', + enum => [qw(v1.2 v2.0)], + description => "The TPM interface version. v2.0 is newer and should be " + . "preferred. Note that this cannot be changed later on.", + optional => 1, + default => 'v2.0', + }, +); +my $tpmstate_fmt = { + volume => { alias => 'file' }, + file => { + type => 'string', + format => 'pve-volume-id-or-qm-path', + default_key => 1, + format_description => 'volume', + description => "The drive's backing volume.", + }, + size => { + type => 'string', + format => 'disk-size', + format_description => 'DiskSize', + description => "Disk size. This is purely informational and has no effect.", + optional => 1, + }, + %tpmversion_fmt, +}; +my $tpmstate_desc = { + optional => 1, + type => 'string', format => $tpmstate_fmt, + description => "Configure a Disk for storing TPM state. " . + $ALLOCATION_SYNTAX_DESC . " Note that SIZE_IN_GiB is ignored here " . + "and that the default size of 4 MiB will always be used instead. The " . + "format is also fixed to 'raw'.", +}; +use constant TPMSTATE_DISK_SIZE => 4 * 1024 * 1024; + +my $alldrive_fmt = { + %drivedesc_base, + %iothread_fmt, + %model_fmt, + %queues_fmt, + %scsiblock_fmt, + %ssd_fmt, + %wwn_fmt, + %tpmversion_fmt, +}; + my $unused_fmt = { volume => { alias => 'file' }, file => { @@ -379,6 +418,7 @@ for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++) { } $drivedesc_hash->{efidisk0} = $efidisk_desc; +$drivedesc_hash->{tpmstate0} = $tpmstate_desc; for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { $drivedesc_hash->{"unused$i"} = $unuseddesc; @@ -390,7 +430,8 @@ sub valid_drive_names { (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))), (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))), (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))), - 'efidisk0'); + 'efidisk0', + 'tpmstate0'); } sub is_valid_drivename { diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm index 44b705f..b133694 100644 --- a/PVE/VZDump/QemuServer.pm +++ b/PVE/VZDump/QemuServer.pm @@ -86,11 +86,10 @@ sub prepare { if (!$volume->{included}) { $self->loginfo("exclude disk '$name' '$volid' ($volume->{reason})"); next; - } elsif ($self->{vm_was_running} && $volume_config->{iothread}) { - if (!PVE::QemuServer::Machine::runs_at_least_qemu_version($vmid, 4, 0, 1)) { - die "disk '$name' '$volid' (iothread=on) can't use backup feature with running QEMU " . - "version < 4.0.1! Either set backup=no for this drive or upgrade QEMU and restart VM\n"; - } + } elsif ($self->{vm_was_running} && $volume_config->{iothread} && + !PVE::QemuServer::Machine::runs_at_least_qemu_version($vmid, 4, 0, 1)) { + die "disk '$name' '$volid' (iothread=on) can't use backup feature with running QEMU " . + "version < 4.0.1! Either set backup=no for this drive or upgrade QEMU and restart VM\n"; } else { my $log = "include disk '$name' '$volid'"; if (defined(my $size = $volume_config->{size})) { @@ -131,6 +130,12 @@ sub prepare { qmdevice => "drive-$ds", }; + if ($ds eq 'tpmstate0') { + # TPM drive only exists for backup, which is reflected in the name + $diskinfo->{qmdevice} = 'drive-tpmstate0-backup'; + $task->{tpmpath} = $path; + } + if (-b $path) { $diskinfo->{type} = 'block'; } else { @@ -425,6 +430,28 @@ my $query_backup_status_loop = sub { }; }; +my $attach_tpmstate_drive = sub { + my ($self, $task, $vmid) = @_; + + return if !$task->{tpmpath}; + + # unconditionally try to remove the tpmstate-named drive - it only exists + # for backing up, and avoids errors if left over from some previous event + eval { PVE::QemuServer::qemu_drivedel($vmid, "tpmstate0-backup"); }; + + $self->loginfo('attaching TPM drive to QEMU for backup'); + + my $drive = "file=$task->{tpmpath},if=none,read-only=on,id=drive-tpmstate0-backup"; + my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\""); + die "attaching TPM drive failed\n" if $ret !~ m/OK/s; +}; + +my $detach_tpmstate_drive = sub { + my ($task, $vmid) = @_; + return if !$task->{tpmpath} || !PVE::QemuServer::check_running($vmid); + eval { PVE::QemuServer::qemu_drivedel($vmid, "tpmstate0-backup"); }; +}; + sub archive_pbs { my ($self, $task, $vmid) = @_; @@ -501,6 +528,8 @@ sub archive_pbs { $master_keyfile = undef; # skip rest of master key handling below } + $attach_tpmstate_drive->($self, $task, $vmid); + my $fs_frozen = $self->qga_fs_freeze($task, $vmid); my $params = { @@ -673,6 +702,8 @@ sub archive_vma { die "interrupted by signal\n"; }; + $attach_tpmstate_drive->($self, $task, $vmid); + my $outfh; if ($opts->{stdout}) { $outfh = $opts->{stdout}; @@ -876,6 +907,8 @@ sub snapshot { sub cleanup { my ($self, $task, $vmid) = @_; + $detach_tpmstate_drive->($task, $vmid); + if ($self->{qmeventd_fh}) { close($self->{qmeventd_fh}); } -- 2.39.2