]> git.proxmox.com Git - pve-manager.git/blame - bin/pvestatd
implement new auto balloon algorithm
[pve-manager.git] / bin / pvestatd
CommitLineData
aff192e6
DM
1#!/usr/bin/perl -w
2
3use strict;
4use PVE::SafeSyslog;
5use POSIX ":sys_wait_h";
6use Fcntl ':flock';
7use Getopt::Long;
8use Time::HiRes qw (gettimeofday);
f9d4fc64 9use PVE::Tools qw(dir_glob_foreach file_read_firstline);
aff192e6
DM
10use PVE::ProcFSTools;
11use Filesys::Df;
12use PVE::INotify;
13use PVE::Cluster qw(cfs_read_file);
14use PVE::Storage;
15use PVE::QemuServer;
b3409356 16use PVE::OpenVZ;
aff192e6 17use PVE::RPCEnvironment;
16b69b6c 18use PVE::API2::Subscription;
53f13052 19use PVE::AutoBalloon;
aff192e6
DM
20
21$SIG{'__WARN__'} = sub {
22 my $err = $@;
23 my $t = $_[0];
24 chomp $t;
25 syslog('warning', "WARNING: %s", $t);
26 $@ = $err;
27};
28
29initlog('pvestatd');
30
31$ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
32
33die "please run as root\n" if $> != 0;
34
35my $nodename = PVE::INotify::nodename();
36
37my $opt_debug;
38
39if (!GetOptions ('debug' => \$opt_debug)) {
40 die "USAGE: $0 [--debug]\n";
41}
42
43my $opt_pidfile = "/var/run/pvestatd.pid";
44
45sub lockpidfile {
46 my $pidfile = shift;
47 my $lkfn = "$pidfile.lock";
48
49 if (!open (FLCK, ">>$lkfn")) {
50 my $msg = "can't aquire lock on file '$lkfn' - $!";
51 syslog ('err', $msg);
52 die "ERROR: $msg\n";
53 }
54
55 if (!flock (FLCK, LOCK_EX|LOCK_NB)) {
56 close (FLCK);
57 my $msg = "can't aquire lock '$lkfn' - $!";
58 syslog ('err', $msg);
59 die "ERROR: $msg\n";
60 }
61}
62
63sub writepidfile {
64 my $pidfile = shift;
65
66 if (!open (PIDFH, ">$pidfile")) {
67 my $msg = "can't open pid file '$pidfile' - $!";
68 syslog ('err', $msg);
69 die "ERROR: $msg\n";
70 }
71 print PIDFH "$$\n";
72 close (PIDFH);
73}
74
75# try to get the lock
76lockpidfile($opt_pidfile);
77
78# run in background
79my $spid;
80
81my $restart = $ENV{RESTART_PVESTATD};
82
83if (!$opt_debug) {
84 open STDIN, '</dev/null' || die "can't read /dev/null";
85 open STDOUT, '>/dev/null' || die "can't write /dev/null";
86}
87
88if (!$restart && !$opt_debug) {
89 $spid = fork();
90 if (!defined ($spid)) {
91 my $msg = "can't put server into background - fork failed";
92 syslog('err', $msg);
93 die "ERROR: $msg\n";
94 } elsif ($spid) { #parent
95 exit (0);
96 }
97}
98
99writepidfile($opt_pidfile);
100
101open STDERR, '>&STDOUT' || die "can't close STDERR\n";
102
103sub cleanup {
104 unlink "$opt_pidfile.lock";
105 unlink "$opt_pidfile";
106}
107
108$SIG{INT} = $SIG{TERM} = $SIG{QUIT} = sub {
109 syslog('info' , "server closing");
110
111 $SIG{INT} = 'DEFAULT';
112
113 # wait for children
114 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
115
116 cleanup();
117
118 exit (0);
119};
120
121PVE::INotify::inotify_init();
122
123my $reload_config;
124
125if ($restart) {
126 syslog('info' , "restarting server");
127} else {
128 syslog('info' , "starting server");
129}
130
131$SIG{HUP} = sub {
132 $reload_config = 1;
133};
134
135sub update_node_status {
136
137 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
138
139 my $stat = PVE::ProcFSTools::read_proc_stat();
140
141 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
142
143 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
144
145 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
146
147 my $maxcpu = $cpuinfo->{cpus};
148
16b69b6c
DM
149 my $subinfo = PVE::INotify::read_file('subscription');
150 my $sublevel = $subinfo->{level} || '';
151
aff192e6
DM
152 # traffic from/to physical interface cards
153 my $netin = 0;
154 my $netout = 0;
155 foreach my $dev (keys %$netdev) {
156 next if $dev !~ m/^eth\d+$/;
157 $netin += $netdev->{$dev}->{receive};
158 $netout += $netdev->{$dev}->{transmit};
159 }
160
161 my $meminfo = PVE::ProcFSTools::read_meminfo();
162
163 my $dinfo = df('/', 1); # output is bytes
164
165 my $ctime = time();
166
167 # everything not free is considered to be used
168 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
169
16b69b6c 170 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
aff192e6
DM
171 "$meminfo->{memtotal}:$meminfo->{memused}:" .
172 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
173 "$dinfo->{blocks}:$dused:$netin:$netout";
174
175 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
176}
177
0883a378
DM
178sub auto_balloning {
179 my ($vmstatus) = @_;
180
181 my $log = sub {
53f13052
DM
182 return if !$opt_debug;
183 print @_;
0883a378 184 };
53f13052 185
0883a378
DM
186 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
187
188 # to debug, run 'pvestatd -d' and set memtotal here
53f13052 189 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
0883a378
DM
190
191 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
192
193 # we try to use about 80% host memory
194 # goal: we want to change memory usage by this amount (positive or negative)
195 my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
196
0883a378 197 my $maxchange = 100*1024*1024;
53f13052 198 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
0883a378 199
53f13052 200 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
0883a378 201
53f13052 202 foreach my $vmid (keys %$vmstatus) {
0883a378
DM
203 next if !$res->{$vmid};
204 my $d = $vmstatus->{$vmid};
205 my $diff = int($res->{$vmid} - $d->{balloon});
206 my $absdiff = $diff < 0 ? -$diff : $diff;
207 if ($absdiff > 0) {
208 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
209 eval {
210 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
211 value => int($res->{$vmid}));
212 };
213 warn $@ if $@;
214 }
215 }
216}
217
aff192e6
DM
218sub update_qemu_status {
219
220 my $ctime = time();
221
cbb20c6e 222 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
aff192e6 223
0883a378
DM
224 eval { auto_balloning($vmstatus); };
225 syslog('err', "auto ballooning error: $@") if $@;
226
aff192e6
DM
227 foreach my $vmid (keys %$vmstatus) {
228 my $d = $vmstatus->{$vmid};
229 my $data;
230 if ($d->{pid}) { # running
231 $data = "$d->{uptime}:$d->{name}:$ctime:$d->{cpus}:$d->{cpu}:" .
232 "$d->{maxmem}:$d->{mem}:" .
233 "$d->{maxdisk}:$d->{disk}:" .
234 "$d->{netin}:$d->{netout}:" .
235 "$d->{diskread}:$d->{diskwrite}";
236 } else {
237 $data = "0:$d->{name}:$ctime:$d->{cpus}::" .
238 "$d->{maxmem}::" .
239 "$d->{maxdisk}:$d->{disk}:" .
240 ":::";
241 }
242 PVE::Cluster::broadcast_rrd("pve2-vm/$vmid", $data);
243 }
244}
245
f9d4fc64
DM
246sub find_vzctl_console_pids {
247
248 my $res = {};
249
250 dir_glob_foreach('/proc', '\d+', sub {
251 my ($pid) = @_;
252
253 my $cmdline = file_read_firstline("/proc/$pid/cmdline");
254 return if !$cmdline;
255
256 my @args = split(/\0/, $cmdline);
257
258 # serach for vzctl console <vmid>
259 return if scalar(@args) != 3;
260 return if $args[1] ne 'console';
261 return if $args[2] !~ m/^\d+$/;
262 return if $args[0] !~ m|^(/usr/sbin/)?vzctl$|;
263
264 my $vmid = $args[2];
265
266 push @{$res->{$vmid}}, $pid;
267 });
268
269 return $res;
270}
271sub remove_stale_openvz_consoles {
272
273 my $vmstatus = PVE::OpenVZ::vmstatus();
274 my $pidhash = find_vzctl_console_pids();
275
276 foreach my $vmid (keys %$pidhash) {
277 next if defined($vmstatus->{$vmid});
278 syslog('info', "remove stale vzctl console for CT $vmid");
279 foreach my $pid (@{$pidhash->{$vmid}}) {
280 kill(9, $pid);
281 }
282 }
283}
284
b3409356
DM
285sub update_openvz_status {
286
287 my $ctime = time();
288
289 my $vmstatus = PVE::OpenVZ::vmstatus();
290
291 foreach my $vmid (keys %$vmstatus) {
292 my $d = $vmstatus->{$vmid};
293 my $data;
294 if ($d->{status} eq 'running') { # running
295 $data = "$d->{uptime}:$d->{name}:$ctime:$d->{cpus}:$d->{cpu}:" .
296 "$d->{maxmem}:$d->{mem}:" .
297 "$d->{maxdisk}:$d->{disk}:" .
298 "$d->{netin}:$d->{netout}:" .
299 "$d->{diskread}:$d->{diskwrite}";
300 } else {
301 $data = "0:$d->{name}:$ctime:$d->{cpus}::" .
302 "$d->{maxmem}::" .
303 "$d->{maxdisk}:$d->{disk}:" .
304 ":::";
305 }
306 PVE::Cluster::broadcast_rrd("pve2-vm/$vmid", $data);
307 }
308}
309
aff192e6
DM
310sub update_storage_status {
311
312 my $cfg = cfs_read_file("storage.cfg");
313
314 my $ctime = time();
315
316 my $info = PVE::Storage::storage_info($cfg);
317
318 foreach my $storeid (keys %$info) {
319 my $d = $info->{$storeid};
320 next if !$d->{active};
321
322 # everything not free is considered to be used
323 my $realused = $d->{total} - $d->{avail};
324
325 my $data = "$ctime:$d->{total}:$realused";
326
327 my $key = "pve2-storage/${nodename}/$storeid";
328 PVE::Cluster::broadcast_rrd($key, $data);
329 }
330}
331
332sub update_status {
333
334 # update worker list. This is not really required and
335 # we just call this to make sure that we have a correct
336 # list in case of an unexpected crash.
337 eval {
338 my $tlist = PVE::RPCEnvironment::active_workers();
339 PVE::Cluster::broadcast_tasklist($tlist);
340 };
341 my $err = $@;
342 syslog('err', $err) if $err;
343
344 eval {
345 update_node_status();
346 };
347 $err = $@;
348 syslog('err', "node status update error: $err") if $err;
349
350 eval {
351 update_qemu_status();
352 };
353 $err = $@;
354 syslog('err', "qemu status update error: $err") if $err;
355
b3409356
DM
356 eval {
357 update_openvz_status();
358 };
359 $err = $@;
360 syslog('err', "openvz status update error: $err") if $err;
361
aff192e6
DM
362 eval {
363 update_storage_status();
364 };
365 $err = $@;
366 syslog('err', "storage status update error: $err") if $err;
f9d4fc64
DM
367
368 eval {
369 remove_stale_openvz_consoles();
370 };
371 $err = $@;
372 syslog('err', "openvz console cleanup error: $err") if $err;
aff192e6
DM
373}
374
375my $next_update = 0;
376
377# do not update directly after startup, because install scripts
378# have a problem with that
379my $cycle = 0;
380my $updatetime = 10;
381
382my $commandline = [$0, @ARGV];
383
384$0 = "pvestatd";
385
386sub restart_server {
387 my $waittime = shift;
388
389 syslog('info', "server shutdown (restart)");
390
391 $ENV{RESTART_PVESTATD} = 1;
392
393 sleep($waittime) if $waittime; # avoid high server load due to restarts
394
395 exec (@$commandline);
396 exit (-1); # never reached?
397}
398
350b3b46
DM
399my $initial_memory_usage;
400
aff192e6
DM
401for (;;) { # forever
402
403 eval {
404 $next_update = time() + $updatetime;
405
406 if ($cycle) {
407 my ($ccsec, $cusec) = gettimeofday ();
408 eval {
409 $reload_config = 0;
9b0aba10 410 # syslog('info', "start status update");
aff192e6
DM
411 PVE::Cluster::cfs_update();
412 update_status();
413 };
414 my $err = $@;
415
416 if ($err) {
417 syslog('err', "status update error: $err");
418 }
419
420 my ($ccsec_end, $cusec_end) = gettimeofday ();
421 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
422
9b0aba10
DM
423 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
424 if ($cptime > 5);
aff192e6
DM
425 }
426
427 $cycle++;
428
429 my $mem = PVE::ProcFSTools::read_memory_usage();
430
9dbdda49 431 if (!defined($initial_memory_usage) || ($cycle < 10)) {
350b3b46
DM
432 $initial_memory_usage = $mem->{resident};
433 } else {
434 my $diff = $mem->{resident} - $initial_memory_usage;
435 if ($diff > 5*1024*1024) {
436 syslog ('info', "restarting server after $cycle cycles to " .
437 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
438 restart_server ();
439 }
aff192e6
DM
440 }
441
442 my $wcount = 0;
443 while ((time() < $next_update) &&
444 ($wcount < $updatetime) && # protect against time wrap
445 !$reload_config) { $wcount++; sleep (1); };
446 };
447
448 my $err = $@;
449
450 if ($err) {
451 syslog ('err', "ERROR: $err");
452 restart_server(5);
453 exit (0);
454 }
455}
456
457exit (0);
458
459__END__
460
461=head1 NAME
462
463pvestatd - PVE Status Daemon
464
465=head1 SYNOPSIS
466
467pvestatd
468
469=head1 DESCRIPTION
470
471Documentation is available at www.proxmox.com
472
473
474
475
476