#!/usr/bin/perl use strict; use warnings; use PVE::SafeSyslog; use PVE::Daemon; use Time::HiRes qw (gettimeofday); use PVE::Tools qw(dir_glob_foreach file_read_firstline); use PVE::ProcFSTools; use Filesys::Df; use PVE::INotify; use PVE::Cluster qw(cfs_read_file); use PVE::Storage; use PVE::QemuServer; use PVE::LXC; use PVE::RPCEnvironment; use PVE::API2::Subscription; use PVE::AutoBalloon; use PVE::Status::Plugin; use PVE::Status::Graphite; PVE::Status::Graphite->register(); PVE::Status::Plugin->init(); use base qw(PVE::Daemon); my $opt_debug; my $cmdline = [$0, @ARGV]; my %daemon_options = (restart_on_error => 5, stop_wait_time => 5); my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options); my $nodename = PVE::INotify::nodename(); my $restart_request; sub prepare { my $rpcenv = PVE::RPCEnvironment->init('cli'); $rpcenv->init_request(); $rpcenv->set_language($ENV{LANG}); $rpcenv->set_user('root@pam'); $restart_request = 0; } sub init { my ($self) = @_; $opt_debug = $self->{debug}; PVE::Cluster::cfs_update(); } sub shutdown { my ($self) = @_; syslog('info' , "server closing"); # wait for children 1 while (waitpid(-1, POSIX::WNOHANG()) > 0); $self->exit_daemon(0); } sub hup { my ($self) = @_; $restart_request = 1; } sub update_node_status { my ($status_cfg) = @_; my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg(); my $stat = PVE::ProcFSTools::read_proc_stat(); my $netdev = PVE::ProcFSTools::read_proc_net_dev(); my ($uptime) = PVE::ProcFSTools::read_proc_uptime(); my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); my $maxcpu = $cpuinfo->{cpus}; my $subinfo = PVE::INotify::read_file('subscription'); my $sublevel = $subinfo->{level} || ''; # traffic from/to physical interface cards my $netin = 0; my $netout = 0; foreach my $dev (keys %$netdev) { next if $dev !~ m/^eth\d+$/; $netin += $netdev->{$dev}->{receive}; $netout += $netdev->{$dev}->{transmit}; } my $meminfo = PVE::ProcFSTools::read_meminfo(); my $dinfo = df('/', 1); # output is bytes my $ctime = time(); # everything not free is considered to be used my $dused = $dinfo->{blocks} - $dinfo->{bfree}; my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" . "$meminfo->{memtotal}:$meminfo->{memused}:" . "$meminfo->{swaptotal}:$meminfo->{swapused}:" . "$dinfo->{blocks}:$dused:$netin:$netout"; PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data); foreach my $id (keys %{$status_cfg->{ids}}) { my $plugin_config = $status_cfg->{ids}->{$id}; next if $plugin_config->{disable}; my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type}); my $d = {}; # fixme: what data? $plugin->update_node_status($plugin_config, $nodename, $d); } } sub auto_balloning { my ($vmstatus) = @_; my $log = sub { return if !$opt_debug; print @_; }; my $hostmeminfo = PVE::ProcFSTools::read_meminfo(); # to debug, run 'pvestatd -d' and set memtotal here #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused}; # we try to use about 80% host memory # goal: we want to change memory usage by this amount (positive or negative) my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused}); my $maxchange = 100*1024*1024; my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange); &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n"); foreach my $vmid (keys %$vmstatus) { next if !$res->{$vmid}; my $d = $vmstatus->{$vmid}; my $diff = int($res->{$vmid} - $d->{balloon}); my $absdiff = $diff < 0 ? -$diff : $diff; if ($absdiff > 0) { &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n"); eval { PVE::QemuServer::vm_mon_cmd($vmid, "balloon", value => int($res->{$vmid})); }; warn $@ if $@; } } } sub update_qemu_status { my ($status_cfg) = @_; my $ctime = time(); my $vmstatus = PVE::QemuServer::vmstatus(undef, 1); eval { auto_balloning($vmstatus); }; syslog('err', "auto ballooning error: $@") if $@; foreach my $vmid (keys %$vmstatus) { my $d = $vmstatus->{$vmid}; my $data; my $status = $d->{qmpstatus} || $d->{status} || 'stopped'; my $template = $d->{template} ? $d->{template} : "0"; if ($d->{pid}) { # running $data = "$d->{uptime}:$d->{name}:$status:$template:" . "$ctime:$d->{cpus}:$d->{cpu}:" . "$d->{maxmem}:$d->{mem}:" . "$d->{maxdisk}:$d->{disk}:" . "$d->{netin}:$d->{netout}:" . "$d->{diskread}:$d->{diskwrite}"; } else { $data = "0:$d->{name}:$status:$template:$ctime:$d->{cpus}::" . "$d->{maxmem}::" . "$d->{maxdisk}:$d->{disk}:" . ":::"; } PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data); foreach my $id (keys %{$status_cfg->{ids}}) { my $plugin_config = $status_cfg->{ids}->{$id}; next if $plugin_config->{disable}; my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type}); $plugin->update_qemu_status($plugin_config, $vmid, $d); } } } sub remove_stale_lxc_consoles { my $vmstatus = PVE::LXC::vmstatus(); my $pidhash = PVE::LXC::find_lxc_console_pids(); foreach my $vmid (keys %$pidhash) { next if defined($vmstatus->{$vmid}); syslog('info', "remove stale lxc-console for CT $vmid"); foreach my $pid (@{$pidhash->{$vmid}}) { kill(9, $pid); } } } sub update_lxc_status { my ($status_cfg) = @_; my $ctime = time(); my $vmstatus = PVE::LXC::vmstatus(); foreach my $vmid (keys %$vmstatus) { my $d = $vmstatus->{$vmid}; my $data; if ($d->{status} eq 'running') { # running $data = "$d->{uptime}:$d->{name}:$d->{status}:0:$ctime:$d->{cpus}:$d->{cpu}:" . "$d->{maxmem}:$d->{mem}:" . "$d->{maxdisk}:$d->{disk}:" . "$d->{netin}:$d->{netout}:" . "$d->{diskread}:$d->{diskwrite}"; } else { $data = "0:$d->{name}:$d->{status}:0:$ctime:$d->{cpus}::" . "$d->{maxmem}::" . "$d->{maxdisk}:$d->{disk}:" . ":::"; } PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data); foreach my $id (keys %{$status_cfg->{ids}}) { my $plugin_config = $status_cfg->{ids}->{$id}; next if $plugin_config->{disable}; my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type}); $plugin->update_lxc_status($plugin_config, $vmid, $d); } } } sub update_storage_status { my ($status_cfg) = @_; my $cfg = cfs_read_file("storage.cfg"); my $ctime = time(); my $info = PVE::Storage::storage_info($cfg); foreach my $storeid (keys %$info) { my $d = $info->{$storeid}; next if !$d->{active}; # everything not free is considered to be used my $realused = $d->{total} - $d->{avail}; my $data = "$ctime:$d->{total}:$realused"; my $key = "pve2-storage/${nodename}/$storeid"; PVE::Cluster::broadcast_rrd($key, $data); foreach my $id (keys %{$status_cfg->{ids}}) { my $plugin_config = $status_cfg->{ids}->{$id}; next if $plugin_config->{disable}; my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type}); $plugin->update_storage_status($plugin_config, $storeid, $d); } } } sub update_status { # update worker list. This is not really required and # we just call this to make sure that we have a correct # list in case of an unexpected crash. eval { my $tlist = PVE::RPCEnvironment::active_workers(); PVE::Cluster::broadcast_tasklist($tlist); }; my $err = $@; syslog('err', $err) if $err; my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg'); eval { update_node_status($status_cfg); }; $err = $@; syslog('err', "node status update error: $err") if $err; eval { update_qemu_status($status_cfg); }; $err = $@; syslog('err', "qemu status update error: $err") if $err; eval { update_lxc_status($status_cfg); }; $err = $@; syslog('err', "lxc status update error: $err") if $err; eval { update_storage_status($status_cfg); }; $err = $@; syslog('err', "storage status update error: $err") if $err; eval { remove_stale_lxc_consoles(); }; $err = $@; syslog('err', "lxc console cleanup error: $err") if $err; } my $next_update = 0; # do not update directly after startup, because install scripts # have a problem with that my $cycle = 0; my $updatetime = 10; my $initial_memory_usage; sub run { my ($self) = @_; for (;;) { # forever $next_update = time() + $updatetime; if ($cycle) { my ($ccsec, $cusec) = gettimeofday (); eval { # syslog('info', "start status update"); PVE::Cluster::cfs_update(); update_status(); }; my $err = $@; if ($err) { syslog('err', "status update error: $err"); } my ($ccsec_end, $cusec_end) = gettimeofday (); my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000; syslog('info', sprintf("status update time (%.3f seconds)", $cptime)) if ($cptime > 5); } $cycle++; my $mem = PVE::ProcFSTools::read_memory_usage(); if (!defined($initial_memory_usage) || ($cycle < 10)) { $initial_memory_usage = $mem->{resident}; } else { my $diff = $mem->{resident} - $initial_memory_usage; if ($diff > 5*1024*1024) { syslog ('info', "restarting server after $cycle cycles to " . "reduce memory usage (free $mem->{resident} ($diff) bytes)"); $self->restart_daemon(); } } my $wcount = 0; while ((time() < $next_update) && ($wcount < $updatetime) && # protect against time wrap !$restart_request) { $wcount++; sleep (1); }; $self->restart_daemon() if $restart_request; } } $daemon->register_start_command(); $daemon->register_restart_command(1); $daemon->register_stop_command(); $daemon->register_status_command(); my $cmddef = { start => [ __PACKAGE__, 'start', []], restart => [ __PACKAGE__, 'restart', []], stop => [ __PACKAGE__, 'stop', []], status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ], }; my $cmd = shift; PVE::CLIHandler::handle_cmd($cmddef, $0, $cmd, \@ARGV, undef, $0, \&prepare); exit (0); __END__ =head1 NAME pvestatd - PVE Status Daemon =head1 SYNOPSIS =include synopsis =head1 DESCRIPTION This daemom queries the status of VMs, storages and containers at regular intervals. The result is sent to all nodes in the cluster. =include pve_copyright