5 use POSIX
":sys_wait_h";
8 use Time
::HiRes qw
(gettimeofday
);
9 use PVE
::Tools
qw(dir_glob_foreach file_read_firstline);
13 use PVE
::Cluster
qw(cfs_read_file);
17 use PVE
::RPCEnvironment
;
18 use PVE
::API2
::Subscription
;
20 $SIG{'__WARN__'} = sub {
24 syslog
('warning', "WARNING: %s", $t);
30 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
32 die "please run as root\n" if $> != 0;
34 my $nodename = PVE
::INotify
::nodename
();
38 if (!GetOptions
('debug' => \
$opt_debug)) {
39 die "USAGE: $0 [--debug]\n";
42 my $opt_pidfile = "/var/run/pvestatd.pid";
46 my $lkfn = "$pidfile.lock";
48 if (!open (FLCK
, ">>$lkfn")) {
49 my $msg = "can't aquire lock on file '$lkfn' - $!";
54 if (!flock (FLCK
, LOCK_EX
|LOCK_NB
)) {
56 my $msg = "can't aquire lock '$lkfn' - $!";
65 if (!open (PIDFH
, ">$pidfile")) {
66 my $msg = "can't open pid file '$pidfile' - $!";
75 lockpidfile
($opt_pidfile);
80 my $restart = $ENV{RESTART_PVESTATD
};
83 open STDIN
, '</dev/null' || die "can't read /dev/null";
84 open STDOUT
, '>/dev/null' || die "can't write /dev/null";
87 if (!$restart && !$opt_debug) {
89 if (!defined ($spid)) {
90 my $msg = "can't put server into background - fork failed";
93 } elsif ($spid) { #parent
98 writepidfile
($opt_pidfile);
100 open STDERR
, '>&STDOUT' || die "can't close STDERR\n";
103 unlink "$opt_pidfile.lock";
104 unlink "$opt_pidfile";
107 $SIG{INT
} = $SIG{TERM
} = $SIG{QUIT
} = sub {
108 syslog
('info' , "server closing");
110 $SIG{INT
} = 'DEFAULT';
113 1 while (waitpid(-1, POSIX
::WNOHANG
()) > 0);
120 PVE
::INotify
::inotify_init
();
125 syslog
('info' , "restarting server");
127 syslog
('info' , "starting server");
134 sub update_node_status
{
136 my ($avg1, $avg5, $avg15) = PVE
::ProcFSTools
::read_loadavg
();
138 my $stat = PVE
::ProcFSTools
::read_proc_stat
();
140 my $netdev = PVE
::ProcFSTools
::read_proc_net_dev
();
142 my ($uptime) = PVE
::ProcFSTools
::read_proc_uptime
();
144 my $cpuinfo = PVE
::ProcFSTools
::read_cpuinfo
();
146 my $maxcpu = $cpuinfo->{cpus
};
148 my $subinfo = PVE
::INotify
::read_file
('subscription');
149 my $sublevel = $subinfo->{level
} || '';
151 # traffic from/to physical interface cards
154 foreach my $dev (keys %$netdev) {
155 next if $dev !~ m/^eth\d+$/;
156 $netin += $netdev->{$dev}->{receive
};
157 $netout += $netdev->{$dev}->{transmit
};
160 my $meminfo = PVE
::ProcFSTools
::read_meminfo
();
162 my $dinfo = df
('/', 1); # output is bytes
166 # everything not free is considered to be used
167 my $dused = $dinfo->{blocks
} - $dinfo->{bfree
};
169 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
170 "$meminfo->{memtotal}:$meminfo->{memused}:" .
171 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
172 "$dinfo->{blocks}:$dused:$netin:$netout";
174 PVE
::Cluster
::broadcast_rrd
("pve2-node/$nodename", $data);
181 return if !$opt_debug;
184 my $hostmeminfo = PVE
::ProcFSTools
::read_meminfo
();
186 # to debug, run 'pvestatd -d' and set memtotal here
187 #$hostmeminfo->{memtotal} = int(3*1024*1024*1024/0.8); # you can set this to test
189 my $hostfreemem = $hostmeminfo->{memtotal
} - $hostmeminfo->{memused
};
191 # we try to use about 80% host memory
192 # goal: we want to change memory usage by this amount (positive or negative)
193 my $goal = int($hostmeminfo->{memtotal
}*0.8 - $hostmeminfo->{memused
});
195 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
197 my $maxchange = 100*1024*1024;
199 my $get_summary = sub {
206 foreach my $vmid (@$idlist) {
207 my $d = $vmstatus->{$vmid};
208 $shares += $d->{shares
} || 1000;
209 $freeshares += 1/($d->{shares
} || 1000);
210 if ($d->{balloon
} > $d->{balloon_min
}) { # just to be sure
211 $alloc += $d->{balloon
} - $d->{balloon_min
}
213 if ($d->{maxmem
} > $d->{balloon
}) { # just to be sure
214 $free += $d->{maxmem
} - $d->{balloon
};
217 return ($shares, $freeshares, $alloc, $free);
220 my $grow_func = sub {
221 my ($res, $idlist, $bytes) = @_;
224 my (undef, $shares_total, undef, $free_total) = &$get_summary($idlist);
225 return $changes if !$shares_total;
227 &$log("grow $goal\n");
229 my $target = $bytes < $free_total ?
$free_total - $bytes : 0;
230 &$log("shares_total: $shares_total\n");
231 &$log("free_total: $free_total\n");
232 &$log("target: $target\n");
234 foreach my $vmid (@$idlist) {
235 my $d = $vmstatus->{$vmid};
236 my $shares = 1/($d->{shares
} || 1000);
237 &$log("shares $vmid: $shares\n");
238 next if $shares < 0; # just to be sure
239 my $max = $d->{maxmem
} - int(($target/$shares_total)*$shares);
240 $max = $d->{balloon_min
} if $max < $d->{balloon_min
};
241 my $new = $d->{balloon
} + $maxchange;
242 my $balloon = $new > $max ?
$max : $new;
243 my $diff = $balloon - $d->{balloon
};
245 $res->{$vmid} = $balloon;
247 &$log("grow request for $vmid ($res->{$vmid}, $diff, $max, $new)\n");
253 my $idlist = []; # list of VMs with working balloon river
254 my $idlist1 = []; # list of VMs with memory pressure
255 my $idlist2 = []; # list of VMs with enough free memory
257 foreach my $vmid (keys %$vmstatus) {
258 my $d = $vmstatus->{$vmid};
259 next if !$d->{balloon
}; # skip if balloon driver not running
260 next if !$d->{balloon_min
}; # skip if balloon value not set in config
262 push @$idlist, $vmid;
264 if (($goal > 0) && $d->{freemem
} &&
265 ($d->{freemem
} > $d->{maxmem
}*0.25) &&
266 ($d->{balloon
} >= $d->{balloon_min
})) {
267 push @$idlist2, $vmid;
268 &$log("idlist2 $vmid $d->{balloon}, $d->{balloon_min}, $d->{freemem}\n");
270 push @$idlist1, $vmid;
271 &$log("idlist1 $vmid $d->{balloon}, $d->{balloon_min}, $d->{freemem}\n");
277 if ($goal > 10*1024*1024) {
278 &$log("grow request $goal\n");
279 # we priorize VMs with memory pressure
280 if (!&$grow_func($res, $idlist1, $goal)) {
281 &$grow_func($res, $idlist2, $goal);
283 } elsif ($goal < -10*1024*1024) {
284 &$log("shrink request $goal\n");
285 my ($shares_total, undef, $alloc_old) = &$get_summary($idlist);
286 my $alloc_new = $alloc_old + $goal;
287 $alloc_new = 0 if $alloc_new < 0;
288 &$log("shares_total: $shares_total $alloc_new\n");
290 foreach my $vmid (@$idlist) {
291 my $d = $vmstatus->{$vmid};
292 my $shares = $d->{shares
} || 1000;
293 next if $shares < 0; # just to be sure
294 my $min = $d->{balloon_min
} + int(($alloc_new/$shares_total)*$shares);
295 my $new = $d->{balloon
} - $maxchange;
296 $res->{$vmid} = $new > $min ?
$new : $min;
299 &$log("do nothing\n");
300 # do nothing - requested change to small
303 foreach my $vmid (@$idlist) {
304 next if !$res->{$vmid};
305 my $d = $vmstatus->{$vmid};
306 my $diff = int($res->{$vmid} - $d->{balloon
});
307 my $absdiff = $diff < 0 ?
-$diff : $diff;
309 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
311 PVE
::QemuServer
::vm_mon_cmd
($vmid, "balloon",
312 value
=> int($res->{$vmid}));
319 sub update_qemu_status
{
323 my $vmstatus = PVE
::QemuServer
::vmstatus
(undef, 1);
325 eval { auto_balloning
($vmstatus); };
326 syslog
('err', "auto ballooning error: $@") if $@;
328 foreach my $vmid (keys %$vmstatus) {
329 my $d = $vmstatus->{$vmid};
331 if ($d->{pid
}) { # running
332 $data = "$d->{uptime}:$d->{name}:$ctime:$d->{cpus}:$d->{cpu}:" .
333 "$d->{maxmem}:$d->{mem}:" .
334 "$d->{maxdisk}:$d->{disk}:" .
335 "$d->{netin}:$d->{netout}:" .
336 "$d->{diskread}:$d->{diskwrite}";
338 $data = "0:$d->{name}:$ctime:$d->{cpus}::" .
340 "$d->{maxdisk}:$d->{disk}:" .
343 PVE
::Cluster
::broadcast_rrd
("pve2-vm/$vmid", $data);
347 sub find_vzctl_console_pids
{
351 dir_glob_foreach
('/proc', '\d+', sub {
354 my $cmdline = file_read_firstline
("/proc/$pid/cmdline");
357 my @args = split(/\0/, $cmdline);
359 # serach for vzctl console <vmid>
360 return if scalar(@args) != 3;
361 return if $args[1] ne 'console';
362 return if $args[2] !~ m/^\d+$/;
363 return if $args[0] !~ m
|^(/usr/sbin
/)?vzctl
$|;
367 push @{$res->{$vmid}}, $pid;
372 sub remove_stale_openvz_consoles
{
374 my $vmstatus = PVE
::OpenVZ
::vmstatus
();
375 my $pidhash = find_vzctl_console_pids
();
377 foreach my $vmid (keys %$pidhash) {
378 next if defined($vmstatus->{$vmid});
379 syslog
('info', "remove stale vzctl console for CT $vmid");
380 foreach my $pid (@{$pidhash->{$vmid}}) {
386 sub update_openvz_status
{
390 my $vmstatus = PVE
::OpenVZ
::vmstatus
();
392 foreach my $vmid (keys %$vmstatus) {
393 my $d = $vmstatus->{$vmid};
395 if ($d->{status
} eq 'running') { # running
396 $data = "$d->{uptime}:$d->{name}:$ctime:$d->{cpus}:$d->{cpu}:" .
397 "$d->{maxmem}:$d->{mem}:" .
398 "$d->{maxdisk}:$d->{disk}:" .
399 "$d->{netin}:$d->{netout}:" .
400 "$d->{diskread}:$d->{diskwrite}";
402 $data = "0:$d->{name}:$ctime:$d->{cpus}::" .
404 "$d->{maxdisk}:$d->{disk}:" .
407 PVE
::Cluster
::broadcast_rrd
("pve2-vm/$vmid", $data);
411 sub update_storage_status
{
413 my $cfg = cfs_read_file
("storage.cfg");
417 my $info = PVE
::Storage
::storage_info
($cfg);
419 foreach my $storeid (keys %$info) {
420 my $d = $info->{$storeid};
421 next if !$d->{active
};
423 # everything not free is considered to be used
424 my $realused = $d->{total
} - $d->{avail
};
426 my $data = "$ctime:$d->{total}:$realused";
428 my $key = "pve2-storage/${nodename}/$storeid";
429 PVE
::Cluster
::broadcast_rrd
($key, $data);
435 # update worker list. This is not really required and
436 # we just call this to make sure that we have a correct
437 # list in case of an unexpected crash.
439 my $tlist = PVE
::RPCEnvironment
::active_workers
();
440 PVE
::Cluster
::broadcast_tasklist
($tlist);
443 syslog
('err', $err) if $err;
446 update_node_status
();
449 syslog
('err', "node status update error: $err") if $err;
452 update_qemu_status
();
455 syslog
('err', "qemu status update error: $err") if $err;
458 update_openvz_status
();
461 syslog
('err', "openvz status update error: $err") if $err;
464 update_storage_status
();
467 syslog
('err', "storage status update error: $err") if $err;
470 remove_stale_openvz_consoles
();
473 syslog
('err', "openvz console cleanup error: $err") if $err;
478 # do not update directly after startup, because install scripts
479 # have a problem with that
483 my $commandline = [$0, @ARGV];
488 my $waittime = shift;
490 syslog
('info', "server shutdown (restart)");
492 $ENV{RESTART_PVESTATD
} = 1;
494 sleep($waittime) if $waittime; # avoid high server load due to restarts
496 exec (@$commandline);
497 exit (-1); # never reached?
500 my $initial_memory_usage;
505 $next_update = time() + $updatetime;
508 my ($ccsec, $cusec) = gettimeofday
();
511 # syslog('info', "start status update");
512 PVE
::Cluster
::cfs_update
();
518 syslog
('err', "status update error: $err");
521 my ($ccsec_end, $cusec_end) = gettimeofday
();
522 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
524 syslog
('info', sprintf("status update time (%.3f seconds)", $cptime))
530 my $mem = PVE
::ProcFSTools
::read_memory_usage
();
532 if (!defined($initial_memory_usage) || ($cycle < 10)) {
533 $initial_memory_usage = $mem->{resident
};
535 my $diff = $mem->{resident
} - $initial_memory_usage;
536 if ($diff > 5*1024*1024) {
537 syslog
('info', "restarting server after $cycle cycles to " .
538 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
544 while ((time() < $next_update) &&
545 ($wcount < $updatetime) && # protect against time wrap
546 !$reload_config) { $wcount++; sleep (1); };
552 syslog
('err', "ERROR: $err");
564 pvestatd - PVE Status Daemon
572 Documentation is available at www.proxmox.com