]> git.proxmox.com Git - pve-manager.git/blob - bin/pvestatd
pvestatd: fix register_XYZ_command parameters
[pve-manager.git] / bin / pvestatd
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 use PVE::SafeSyslog;
6 use PVE::Daemon;
7
8 use Time::HiRes qw (gettimeofday);
9 use PVE::Tools qw(dir_glob_foreach file_read_firstline);
10 use PVE::ProcFSTools;
11 use Filesys::Df;
12 use PVE::INotify;
13 use PVE::Cluster qw(cfs_read_file);
14 use PVE::Storage;
15 use PVE::QemuServer;
16 use PVE::OpenVZ;
17 use PVE::RPCEnvironment;
18 use PVE::API2::Subscription;
19 use PVE::AutoBalloon;
20
21 use base qw(PVE::Daemon);
22
23 my $opt_debug;
24
25 my $cmdline = [$0, @ARGV];
26
27 my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
28
29 my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
30
31 my $rpcenv = PVE::RPCEnvironment->init('cli');
32
33 $rpcenv->init_request();
34 $rpcenv->set_language($ENV{LANG});
35 $rpcenv->set_user('root@pam');
36
37 my $nodename = PVE::INotify::nodename();
38 my $restart_request = 0;
39
40 sub init {
41 my ($self) = @_;
42
43 $opt_debug = $self->{debug};
44
45 PVE::Cluster::cfs_update();
46 }
47
48 sub shutdown {
49 my ($self) = @_;
50
51 syslog('info' , "server closing");
52
53 # wait for children
54 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
55
56 $self->exit_daemon(0);
57 }
58
59 sub hup {
60 my ($self) = @_;
61
62 syslog('info' , "received signal HUP");
63
64 $restart_request = 1;
65 }
66
67 sub update_node_status {
68
69 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
70
71 my $stat = PVE::ProcFSTools::read_proc_stat();
72
73 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
74
75 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
76
77 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
78
79 my $maxcpu = $cpuinfo->{cpus};
80
81 my $subinfo = PVE::INotify::read_file('subscription');
82 my $sublevel = $subinfo->{level} || '';
83
84 # traffic from/to physical interface cards
85 my $netin = 0;
86 my $netout = 0;
87 foreach my $dev (keys %$netdev) {
88 next if $dev !~ m/^eth\d+$/;
89 $netin += $netdev->{$dev}->{receive};
90 $netout += $netdev->{$dev}->{transmit};
91 }
92
93 my $meminfo = PVE::ProcFSTools::read_meminfo();
94
95 my $dinfo = df('/', 1); # output is bytes
96
97 my $ctime = time();
98
99 # everything not free is considered to be used
100 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
101
102 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
103 "$meminfo->{memtotal}:$meminfo->{memused}:" .
104 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
105 "$dinfo->{blocks}:$dused:$netin:$netout";
106
107 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
108 }
109
110 sub auto_balloning {
111 my ($vmstatus) = @_;
112
113 my $log = sub {
114 return if !$opt_debug;
115 print @_;
116 };
117
118 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
119
120 # to debug, run 'pvestatd -d' and set memtotal here
121 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
122
123 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
124
125 # we try to use about 80% host memory
126 # goal: we want to change memory usage by this amount (positive or negative)
127 my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
128
129 my $maxchange = 100*1024*1024;
130 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
131
132 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
133
134 foreach my $vmid (keys %$vmstatus) {
135 next if !$res->{$vmid};
136 my $d = $vmstatus->{$vmid};
137 my $diff = int($res->{$vmid} - $d->{balloon});
138 my $absdiff = $diff < 0 ? -$diff : $diff;
139 if ($absdiff > 0) {
140 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
141 eval {
142 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
143 value => int($res->{$vmid}));
144 };
145 warn $@ if $@;
146 }
147 }
148 }
149
150 sub update_qemu_status {
151
152 my $ctime = time();
153
154 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
155
156 eval { auto_balloning($vmstatus); };
157 syslog('err', "auto ballooning error: $@") if $@;
158
159 foreach my $vmid (keys %$vmstatus) {
160 my $d = $vmstatus->{$vmid};
161 my $data;
162 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
163 my $template = $d->{template} ? $d->{template} : "0";
164 if ($d->{pid}) { # running
165 $data = "$d->{uptime}:$d->{name}:$status:$template:" .
166 "$ctime:$d->{cpus}:$d->{cpu}:" .
167 "$d->{maxmem}:$d->{mem}:" .
168 "$d->{maxdisk}:$d->{disk}:" .
169 "$d->{netin}:$d->{netout}:" .
170 "$d->{diskread}:$d->{diskwrite}";
171 } else {
172 $data = "0:$d->{name}:$status:$template:$ctime:$d->{cpus}::" .
173 "$d->{maxmem}::" .
174 "$d->{maxdisk}:$d->{disk}:" .
175 ":::";
176 }
177 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
178 }
179 }
180
181 sub find_vzctl_console_pids {
182
183 my $res = {};
184
185 dir_glob_foreach('/proc', '\d+', sub {
186 my ($pid) = @_;
187
188 my $cmdline = file_read_firstline("/proc/$pid/cmdline");
189 return if !$cmdline;
190
191 my @args = split(/\0/, $cmdline);
192
193 # serach for vzctl console <vmid>
194 return if scalar(@args) != 3;
195 return if $args[1] ne 'console';
196 return if $args[2] !~ m/^\d+$/;
197 return if $args[0] !~ m|^(/usr/sbin/)?vzctl$|;
198
199 my $vmid = $args[2];
200
201 push @{$res->{$vmid}}, $pid;
202 });
203
204 return $res;
205 }
206
207 sub remove_stale_openvz_consoles {
208
209 my $vmstatus = PVE::OpenVZ::vmstatus();
210 my $pidhash = find_vzctl_console_pids();
211
212 foreach my $vmid (keys %$pidhash) {
213 next if defined($vmstatus->{$vmid});
214 syslog('info', "remove stale vzctl console for CT $vmid");
215 foreach my $pid (@{$pidhash->{$vmid}}) {
216 kill(9, $pid);
217 }
218 }
219 }
220
221 sub update_openvz_status {
222
223 my $ctime = time();
224
225 my $vmstatus = PVE::OpenVZ::vmstatus();
226
227 foreach my $vmid (keys %$vmstatus) {
228 my $d = $vmstatus->{$vmid};
229 my $data;
230 if ($d->{status} eq 'running') { # running
231 $data = "$d->{uptime}:$d->{name}:$d->{status}:0:$ctime:$d->{cpus}:$d->{cpu}:" .
232 "$d->{maxmem}:$d->{mem}:" .
233 "$d->{maxdisk}:$d->{disk}:" .
234 "$d->{netin}:$d->{netout}:" .
235 "$d->{diskread}:$d->{diskwrite}";
236 } else {
237 $data = "0:$d->{name}:$d->{status}:0:$ctime:$d->{cpus}::" .
238 "$d->{maxmem}::" .
239 "$d->{maxdisk}:$d->{disk}:" .
240 ":::";
241 }
242 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
243 }
244 }
245
246 sub update_storage_status {
247
248 my $cfg = cfs_read_file("storage.cfg");
249
250 my $ctime = time();
251
252 my $info = PVE::Storage::storage_info($cfg);
253
254 foreach my $storeid (keys %$info) {
255 my $d = $info->{$storeid};
256 next if !$d->{active};
257
258 # everything not free is considered to be used
259 my $realused = $d->{total} - $d->{avail};
260
261 my $data = "$ctime:$d->{total}:$realused";
262
263 my $key = "pve2-storage/${nodename}/$storeid";
264 PVE::Cluster::broadcast_rrd($key, $data);
265 }
266 }
267
268 sub update_status {
269
270 # update worker list. This is not really required and
271 # we just call this to make sure that we have a correct
272 # list in case of an unexpected crash.
273 eval {
274 my $tlist = PVE::RPCEnvironment::active_workers();
275 PVE::Cluster::broadcast_tasklist($tlist);
276 };
277 my $err = $@;
278 syslog('err', $err) if $err;
279
280 eval {
281 update_node_status();
282 };
283 $err = $@;
284 syslog('err', "node status update error: $err") if $err;
285
286 eval {
287 update_qemu_status();
288 };
289 $err = $@;
290 syslog('err', "qemu status update error: $err") if $err;
291
292 eval {
293 update_openvz_status();
294 };
295 $err = $@;
296 syslog('err', "openvz status update error: $err") if $err;
297
298 eval {
299 update_storage_status();
300 };
301 $err = $@;
302 syslog('err', "storage status update error: $err") if $err;
303
304 eval {
305 remove_stale_openvz_consoles();
306 };
307 $err = $@;
308 syslog('err', "openvz console cleanup error: $err") if $err;
309 }
310
311 my $next_update = 0;
312
313 # do not update directly after startup, because install scripts
314 # have a problem with that
315 my $cycle = 0;
316 my $updatetime = 10;
317
318 my $initial_memory_usage;
319
320 sub run {
321 my ($self) = @_;
322
323 for (;;) { # forever
324
325 $next_update = time() + $updatetime;
326
327 if ($cycle) {
328 my ($ccsec, $cusec) = gettimeofday ();
329 eval {
330 # syslog('info', "start status update");
331 PVE::Cluster::cfs_update();
332 update_status();
333 };
334 my $err = $@;
335
336 if ($err) {
337 syslog('err', "status update error: $err");
338 }
339
340 my ($ccsec_end, $cusec_end) = gettimeofday ();
341 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
342
343 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
344 if ($cptime > 5);
345 }
346
347 $cycle++;
348
349 my $mem = PVE::ProcFSTools::read_memory_usage();
350
351 if (!defined($initial_memory_usage) || ($cycle < 10)) {
352 $initial_memory_usage = $mem->{resident};
353 } else {
354 my $diff = $mem->{resident} - $initial_memory_usage;
355 if ($diff > 5*1024*1024) {
356 syslog ('info', "restarting server after $cycle cycles to " .
357 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
358 $self->restart_daemon();
359 }
360 }
361
362 my $wcount = 0;
363 while ((time() < $next_update) &&
364 ($wcount < $updatetime) && # protect against time wrap
365 !$restart_request) { $wcount++; sleep (1); };
366
367 $self->restart_daemon() if $restart_request;
368 }
369 }
370
371 $daemon->register_start_command();
372 $daemon->register_restart_command(1);
373 $daemon->register_stop_command();
374 $daemon->register_status_command();
375
376 my $cmddef = {
377 start => [ __PACKAGE__, 'start', []],
378 restart => [ __PACKAGE__, 'restart', []],
379 stop => [ __PACKAGE__, 'stop', []],
380 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
381 };
382
383 my $cmd = shift;
384
385 PVE::CLIHandler::handle_cmd($cmddef, $0, $cmd, \@ARGV, undef, $0);
386
387 exit (0);
388
389 __END__
390
391 =head1 NAME
392
393 pvestatd - PVE Status Daemon
394
395 =head1 SYNOPSIS
396
397 =include synopsis
398
399 =head1 DESCRIPTION
400
401 This daemom queries the status of VMs, storages and containers at
402 regular intervals. The result is sent to all nodes in the cluster.
403
404 =include pve_copyright
405
406
407
408
409