PVE/Service/pvestatd.pm

   1 package PVE::Service::pvestatd;
   2
   3 use strict;
   4 use warnings;
   5
   6 use PVE::SafeSyslog;
   7 use PVE::Daemon;
   8
   9 use Time::HiRes qw (gettimeofday);
  10 use PVE::Tools qw(dir_glob_foreach file_read_firstline);
  11 use PVE::ProcFSTools;
  12 use PVE::CpuSet;
  13 use Filesys::Df;
  14 use PVE::INotify;
  15 use PVE::Cluster qw(cfs_read_file);
  16 use PVE::Storage;
  17 use PVE::QemuServer;
  18 use PVE::LXC;
  19 use PVE::LXC::Config;
  20 use PVE::RPCEnvironment;
  21 use PVE::API2::Subscription;
  22 use PVE::AutoBalloon;
  23
  24 use PVE::Status::Plugin;
  25 use PVE::Status::Graphite;
  26 use PVE::Status::InfluxDB;
  27
  28 PVE::Status::Graphite->register();
  29 PVE::Status::InfluxDB->register();
  30 PVE::Status::Plugin->init();
  31
  32 use base qw(PVE::Daemon);
  33
  34 my $opt_debug;
  35 my $restart_request;
  36
  37 my $nodename = PVE::INotify::nodename();
  38
  39 my $cmdline = [$0, @ARGV];
  40
  41 my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
  42 my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
  43
  44 sub init {
  45     my ($self) = @_;
  46
  47     $opt_debug = $self->{debug};
  48
  49     PVE::Cluster::cfs_update();
  50 }
  51
  52 sub shutdown {
  53     my ($self) = @_;
  54
  55     syslog('info' , "server closing");
  56
  57     # wait for children
  58     1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
  59
  60     $self->exit_daemon(0);
  61 }
  62
  63 sub hup {
  64     my ($self) = @_;
  65
  66     $restart_request = 1;
  67 }
  68
  69 my $generate_rrd_string = sub {
  70     my ($data) = @_;
  71
  72     return join(':', map { $_ // 'U' } @$data);
  73 };
  74
  75 sub update_node_status {
  76     my ($status_cfg) = @_;
  77
  78     my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
  79
  80     my $stat = PVE::ProcFSTools::read_proc_stat();
  81
  82     my $netdev = PVE::ProcFSTools::read_proc_net_dev();
  83
  84     my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
  85
  86     my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
  87
  88     my $maxcpu = $cpuinfo->{cpus};
  89
  90     my $subinfo = PVE::INotify::read_file('subscription');
  91     my $sublevel = $subinfo->{level} || '';
  92
  93     # traffic from/to physical interface cards
  94     my $netin = 0;
  95     my $netout = 0;
  96     foreach my $dev (keys %$netdev) {
  97         next if $dev !~ m/^eth\d+$/;
  98         $netin += $netdev->{$dev}->{receive};
  99         $netout += $netdev->{$dev}->{transmit};
 100     }
 101
 102     my $meminfo = PVE::ProcFSTools::read_meminfo();
 103
 104     my $dinfo = df('/', 1);     # output is bytes
 105
 106     my $ctime = time();
 107
 108     # everything not free is considered to be used
 109     my $dused = $dinfo->{blocks} - $dinfo->{bfree};
 110
 111     my $data = $generate_rrd_string->(
 112         [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu}, $stat->{wait},
 113          $meminfo->{memtotal}, $meminfo->{memused},
 114          $meminfo->{swaptotal}, $meminfo->{swapused},
 115          $dinfo->{blocks}, $dused, $netin, $netout]);
 116
 117     PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
 118
 119     foreach my $id (keys %{$status_cfg->{ids}}) {
 120         my $plugin_config = $status_cfg->{ids}->{$id};
 121         next if $plugin_config->{disable};
 122         my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
 123
 124         my $d = {};
 125         $d->{uptime} = $uptime;
 126         $d->{cpustat} = $stat;
 127         $d->{cpustat}->{avg1} = $avg1;
 128         $d->{cpustat}->{avg5} = $avg5;
 129         $d->{cpustat}->{avg15} = $avg15;
 130         $d->{cpustat}->{cpus} = $maxcpu;
 131         $d->{memory} = $meminfo;
 132         $d->{blockstat} = $dinfo;
 133         $d->{nics} = $netdev;
 134
 135         $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
 136     }
 137 }
 138
 139 sub auto_balloning {
 140     my ($vmstatus) =  @_;
 141
 142     my $log = sub {
 143        return if !$opt_debug;
 144        print @_;
 145     };
 146
 147     my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
 148
 149     # to debug, run 'pvestatd -d' and set  memtotal here
 150     #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
 151
 152     my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
 153
 154     # we try to use about 80% host memory
 155     # goal: we want to change memory usage by this amount (positive or negative)
 156     my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
 157
 158     my $maxchange = 100*1024*1024;
 159     my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
 160
 161     &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
 162
 163     foreach my $vmid (keys %$vmstatus) {
 164         next if !$res->{$vmid};
 165         my $d = $vmstatus->{$vmid};
 166         my $diff = int($res->{$vmid} - $d->{balloon});
 167         my $absdiff = $diff < 0 ? -$diff : $diff;
 168         if ($absdiff > 0) {
 169             &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
 170             eval {
 171                 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
 172                                             value => int($res->{$vmid}));
 173             };
 174             warn $@ if $@;
 175         }
 176     }
 177 }
 178
 179 sub update_qemu_status {
 180     my ($status_cfg) = @_;
 181
 182     my $ctime = time();
 183
 184     my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
 185
 186     eval { auto_balloning($vmstatus); };
 187     syslog('err', "auto ballooning error: $@") if $@;
 188
 189     foreach my $vmid (keys %$vmstatus) {
 190         my $d = $vmstatus->{$vmid};
 191         my $data;
 192         my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
 193         my $template = $d->{template} ? $d->{template} : "0";
 194         if ($d->{pid}) { # running
 195             $data = $generate_rrd_string->(
 196                 [$d->{uptime}, $d->{name}, $status, $template, $ctime, $d->{cpus}, $d->{cpu},
 197                  $d->{maxmem}, $d->{mem}, $d->{maxdisk}, $d->{disk},
 198                  $d->{netin}, $d->{netout}, $d->{diskread}, $d->{diskwrite}]);
 199         } else {
 200             $data = $generate_rrd_string->(
 201                 [0, $d->{name}, $status, $template, $ctime, $d->{cpus}, undef,
 202                  $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
 203         }
 204         PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
 205
 206         foreach my $id (keys %{$status_cfg->{ids}}) {
 207             my $plugin_config = $status_cfg->{ids}->{$id};
 208             next if $plugin_config->{disable};
 209             my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
 210             $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime, $nodename);
 211         }
 212     }
 213 }
 214
 215 sub remove_stale_lxc_consoles {
 216
 217     my $vmstatus = PVE::LXC::vmstatus();
 218     my $pidhash = PVE::LXC::find_lxc_console_pids();
 219
 220     foreach my $vmid (keys %$pidhash) {
 221         next if defined($vmstatus->{$vmid});
 222         syslog('info', "remove stale lxc-console for CT $vmid");
 223         foreach my $pid (@{$pidhash->{$vmid}}) {
 224             kill(9, $pid);
 225         }
 226     }
 227 }
 228
 229 my $rebalance_error_count = {};
 230
 231 sub rebalance_lxc_containers {
 232
 233     return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
 234
 235     my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
 236     my @allowed_cpus = $all_cpus->members();
 237     my $cpucount = scalar(@allowed_cpus);
 238     my $max_cpuid = PVE::CpuSet::max_cpuid();
 239
 240     my @cpu_ctcount = (0) x $max_cpuid;
 241     my @balanced_cts;
 242
 243     my $modify_cpuset = sub {
 244         my ($vmid, $cpuset, $newset) = @_;
 245
 246         if (!$rebalance_error_count->{$vmid}) {
 247             syslog('info', "modified cpu set for lxc/$vmid: " .
 248                    $newset->short_string());
 249         }
 250
 251         eval {
 252
 253             if (-d "/sys/fs/cgroup/cpuset/lxc/$vmid/ns") {
 254                 # allow all, so that we can set new cpuset in /ns
 255                 $all_cpus->write_to_cgroup("lxc/$vmid");
 256                 eval {
 257                     $newset->write_to_cgroup("lxc/$vmid/ns");
 258                 };
 259                 if (my $err = $@) {
 260                     warn $err if !$rebalance_error_count->{$vmid}++;
 261                     # restore original
 262                     $cpuset->write_to_cgroup("lxc/$vmid");
 263                 } else {
 264                     # also apply to container root cgroup
 265                     $newset->write_to_cgroup("lxc/$vmid");
 266                     $rebalance_error_count->{$vmid} = 0;
 267                 }
 268             } else {
 269                 # old style container
 270                 $newset->write_to_cgroup("lxc/$vmid");
 271                 $rebalance_error_count->{$vmid} = 0;
 272             }
 273         };
 274         if (my $err = $@) {
 275             warn $err if !$rebalance_error_count->{$vmid}++;
 276         }
 277     };
 278
 279     my $ctlist = PVE::LXC::config_list();
 280
 281     foreach my $vmid (sort keys %$ctlist) {
 282         next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
 283
 284         my ($conf, $cpuset);
 285         eval {
 286
 287             $conf = PVE::LXC::Config->load_config($vmid);
 288
 289             $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
 290         };
 291         if (my $err = $@) {
 292             warn $err;
 293             next;
 294         }
 295
 296         my @cpuset_members = $cpuset->members();
 297
 298         if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
 299
 300             my $cores = $conf->{cores} || $cpucount;
 301             $cores = $cpucount if $cores > $cpucount;
 302
 303             # see if the number of cores was hot-reduced or
 304             # hasn't been enacted at all yet
 305             my $newset = PVE::CpuSet->new();
 306             if ($cores <  scalar(@cpuset_members)) {
 307                 for (my $i = 0; $i < $cores; $i++) {
 308                     $newset->insert($cpuset_members[$i]);
 309                 }
 310             } elsif ($cores > scalar(@cpuset_members)) {
 311                 my $count = $newset->insert(@cpuset_members);
 312                 foreach my $cpu (@allowed_cpus) {
 313                     $count += $newset->insert($cpu);
 314                     last if $count >= $cores;
 315                 }
 316             } else {
 317                 $newset->insert(@cpuset_members);
 318             }
 319
 320             # Apply hot-plugged changes if any:
 321             if (!$newset->is_equal($cpuset)) {
 322                 @cpuset_members = $newset->members();
 323                 $modify_cpuset->($vmid, $cpuset, $newset);
 324             }
 325
 326             # Note: no need to rebalance if we already use all cores
 327             push @balanced_cts, [$vmid, $cores, $newset]
 328                 if defined($conf->{cores}) && ($cores != $cpucount);
 329         }
 330
 331         foreach my $cpu (@cpuset_members) {
 332             $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
 333         }
 334     }
 335
 336     my $find_best_cpu = sub {
 337         my ($cpulist, $cpu) = @_;
 338
 339         my $cur_cost = $cpu_ctcount[$cpu];
 340         my $cur_cpu = $cpu;
 341
 342         foreach my $candidate (@$cpulist) {
 343             my $cost = $cpu_ctcount[$candidate];
 344             if ($cost < ($cur_cost -1)) {
 345                 $cur_cost = $cost;
 346                 $cur_cpu = $candidate;
 347             }
 348         }
 349
 350         return $cur_cpu;
 351     };
 352
 353     foreach my $bct (@balanced_cts) {
 354         my ($vmid, $cores, $cpuset) = @$bct;
 355
 356         my $newset = PVE::CpuSet->new();
 357
 358         my $rest = [];
 359         foreach my $cpu (@allowed_cpus) {
 360             next if $cpuset->has($cpu);
 361             push @$rest, $cpu;
 362         }
 363
 364         my @members = $cpuset->members();
 365         foreach my $cpu (@members) {
 366             my $best =  &$find_best_cpu($rest, $cpu);
 367             if ($best != $cpu) {
 368                 $cpu_ctcount[$best]++;
 369                 $cpu_ctcount[$cpu]--;
 370             }
 371             $newset->insert($best);
 372         }
 373
 374         if (!$newset->is_equal($cpuset)) {
 375             $modify_cpuset->($vmid, $cpuset, $newset);
 376         }
 377     }
 378 }
 379
 380 sub update_lxc_status {
 381     my ($status_cfg) = @_;
 382
 383     my $ctime = time();
 384
 385     my $vmstatus = PVE::LXC::vmstatus();
 386
 387     foreach my $vmid (keys %$vmstatus) {
 388         my $d = $vmstatus->{$vmid};
 389         my $template = $d->{template} ? $d->{template} : "0";
 390         my $data;
 391         if ($d->{status} eq 'running') { # running
 392             $data = $generate_rrd_string->(
 393                 [$d->{uptime}, $d->{name}, $d->{status}, $template,
 394                  $ctime, $d->{cpus}, $d->{cpu},
 395                  $d->{maxmem}, $d->{mem},
 396                  $d->{maxdisk}, $d->{disk},
 397                  $d->{netin}, $d->{netout},
 398                  $d->{diskread}, $d->{diskwrite}]);
 399         } else {
 400             $data = $generate_rrd_string->(
 401                 [0, $d->{name}, $d->{status}, $template, $ctime, $d->{cpus}, undef,
 402                  $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
 403         }
 404         PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
 405
 406         foreach my $id (keys %{$status_cfg->{ids}}) {
 407             my $plugin_config = $status_cfg->{ids}->{$id};
 408             next if $plugin_config->{disable};
 409             my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
 410             $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime, $nodename);
 411         }
 412     }
 413 }
 414
 415 sub update_storage_status {
 416     my ($status_cfg) = @_;
 417
 418     my $cfg = PVE::Storage::config();
 419
 420     my $ctime = time();
 421
 422     my $info = PVE::Storage::storage_info($cfg);
 423
 424     foreach my $storeid (keys %$info) {
 425         my $d = $info->{$storeid};
 426         next if !$d->{active};
 427
 428         my $data = $generate_rrd_string->([$ctime, $d->{total}, $d->{used}]);
 429
 430         my $key = "pve2-storage/${nodename}/$storeid";
 431         PVE::Cluster::broadcast_rrd($key, $data);
 432
 433         foreach my $id (keys %{$status_cfg->{ids}}) {
 434             my $plugin_config = $status_cfg->{ids}->{$id};
 435             next if $plugin_config->{disable};
 436             my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
 437             $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
 438         }
 439     }
 440 }
 441
 442 sub update_status {
 443
 444     # update worker list. This is not really required and
 445     # we just call this to make sure that we have a correct
 446     # list in case of an unexpected crash.
 447     my $rpcenv = PVE::RPCEnvironment::get();
 448
 449     eval {
 450         my $tlist = $rpcenv->active_workers();
 451         PVE::Cluster::broadcast_tasklist($tlist);
 452     };
 453     my $err = $@;
 454     syslog('err', $err) if $err;
 455
 456     my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
 457
 458     eval {
 459         update_node_status($status_cfg);
 460     };
 461     $err = $@;
 462     syslog('err', "node status update error: $err") if $err;
 463
 464     eval {
 465         update_qemu_status($status_cfg);
 466     };
 467     $err = $@;
 468     syslog('err', "qemu status update error: $err") if $err;
 469
 470     eval {
 471         update_lxc_status($status_cfg);
 472     };
 473     $err = $@;
 474     syslog('err', "lxc status update error: $err") if $err;
 475
 476     eval {
 477         rebalance_lxc_containers();
 478     };
 479     $err = $@;
 480     syslog('err', "lxc cpuset rebalance error: $err") if $err;
 481
 482     eval {
 483         update_storage_status($status_cfg);
 484     };
 485     $err = $@;
 486     syslog('err', "storage status update error: $err") if $err;
 487
 488     eval {
 489         remove_stale_lxc_consoles();
 490     };
 491     $err = $@;
 492     syslog('err', "lxc console cleanup error: $err") if $err;
 493 }
 494
 495 my $next_update = 0;
 496
 497 # do not update directly after startup, because install scripts
 498 # have a problem with that
 499 my $cycle = 0;
 500 my $updatetime = 10;
 501
 502 my $initial_memory_usage;
 503
 504 sub run {
 505     my ($self) = @_;
 506
 507     for (;;) { # forever
 508
 509         $next_update = time() + $updatetime;
 510
 511         if ($cycle) {
 512             my ($ccsec, $cusec) = gettimeofday ();
 513             eval {
 514                 # syslog('info', "start status update");
 515                 PVE::Cluster::cfs_update();
 516                 update_status();
 517             };
 518             my $err = $@;
 519
 520             if ($err) {
 521                 syslog('err', "status update error: $err");
 522             }
 523
 524             my ($ccsec_end, $cusec_end) = gettimeofday ();
 525             my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
 526
 527             syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
 528                 if ($cptime > 5);
 529         }
 530
 531         $cycle++;
 532
 533         my $mem = PVE::ProcFSTools::read_memory_usage();
 534
 535         if (!defined($initial_memory_usage) || ($cycle < 10)) {
 536             $initial_memory_usage = $mem->{resident};
 537         } else {
 538             my $diff = $mem->{resident} - $initial_memory_usage;
 539             if ($diff > 5*1024*1024) {
 540                 syslog ('info', "restarting server after $cycle cycles to " .
 541                         "reduce memory usage (free $mem->{resident} ($diff) bytes)");
 542                 $self->restart_daemon();
 543             }
 544         }
 545
 546         my $wcount = 0;
 547         while ((time() < $next_update) &&
 548                ($wcount < $updatetime) && # protect against time wrap
 549                !$restart_request) { $wcount++; sleep (1); };
 550
 551         $self->restart_daemon() if $restart_request;
 552     }
 553 }
 554
 555 $daemon->register_start_command();
 556 $daemon->register_restart_command(1);
 557 $daemon->register_stop_command();
 558 $daemon->register_status_command();
 559
 560 our $cmddef = {
 561     start => [ __PACKAGE__, 'start', []],
 562     restart => [ __PACKAGE__, 'restart', []],
 563     stop => [ __PACKAGE__, 'stop', []],
 564     status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
 565 };
 566
 567 1;
 568
 569
 570
 571
 572