]> git.proxmox.com Git - pve-manager.git/blame - PVE/Service/pvestatd.pm
add missing file
[pve-manager.git] / PVE / Service / pvestatd.pm
CommitLineData
efd04666
DM
1package PVE::Service::pvestatd;
2
3use strict;
4use warnings;
5
6use PVE::SafeSyslog;
7use PVE::Daemon;
8
9use Time::HiRes qw (gettimeofday);
10use PVE::Tools qw(dir_glob_foreach file_read_firstline);
11use PVE::ProcFSTools;
12use Filesys::Df;
13use PVE::INotify;
14use PVE::Cluster qw(cfs_read_file);
15use PVE::Storage;
16use PVE::QemuServer;
17use PVE::LXC;
18use PVE::RPCEnvironment;
19use PVE::API2::Subscription;
20use PVE::AutoBalloon;
21
22use PVE::Status::Plugin;
23use PVE::Status::Graphite;
24
25PVE::Status::Graphite->register();
26PVE::Status::Plugin->init();
27
28use base qw(PVE::Daemon);
29
30my $opt_debug;
31my $restart_request;
32
33my $nodename = PVE::INotify::nodename();
34
35my $cmdline = [$0, @ARGV];
36
37my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
38my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
39
40sub init {
41 my ($self) = @_;
42
43 $opt_debug = $self->{debug};
44
45 PVE::Cluster::cfs_update();
46}
47
48sub shutdown {
49 my ($self) = @_;
50
51 syslog('info' , "server closing");
52
53 # wait for children
54 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
55
56 $self->exit_daemon(0);
57}
58
59sub hup {
60 my ($self) = @_;
61
62 $restart_request = 1;
63}
64
65sub update_node_status {
66 my ($status_cfg) = @_;
67
68 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
69
70 my $stat = PVE::ProcFSTools::read_proc_stat();
71
72 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
73
74 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
75
76 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
77
78 my $maxcpu = $cpuinfo->{cpus};
79
80 my $subinfo = PVE::INotify::read_file('subscription');
81 my $sublevel = $subinfo->{level} || '';
82
83 # traffic from/to physical interface cards
84 my $netin = 0;
85 my $netout = 0;
86 foreach my $dev (keys %$netdev) {
87 next if $dev !~ m/^eth\d+$/;
88 $netin += $netdev->{$dev}->{receive};
89 $netout += $netdev->{$dev}->{transmit};
90 }
91
92 my $meminfo = PVE::ProcFSTools::read_meminfo();
93
94 my $dinfo = df('/', 1); # output is bytes
95
96 my $ctime = time();
97
98 # everything not free is considered to be used
99 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
100
101 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
102 "$meminfo->{memtotal}:$meminfo->{memused}:" .
103 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
104 "$dinfo->{blocks}:$dused:$netin:$netout";
105
106 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
107
108 foreach my $id (keys %{$status_cfg->{ids}}) {
109 my $plugin_config = $status_cfg->{ids}->{$id};
110 next if $plugin_config->{disable};
111 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
112
113 my $d = {};
114 $d->{uptime} = $uptime;
115 $d->{cpustat} = $stat;
116 $d->{cpustat}->{avg1} = $avg1;
117 $d->{cpustat}->{avg5} = $avg5;
118 $d->{cpustat}->{avg15} = $avg15;
119 $d->{cpustat}->{cpus} = $maxcpu;
120 $d->{memory} = $meminfo;
121 $d->{blockstat} = $dinfo;
122 $d->{nics} = $netdev;
123
124 $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
125 }
126}
127
128sub auto_balloning {
129 my ($vmstatus) = @_;
130
131 my $log = sub {
132 return if !$opt_debug;
133 print @_;
134 };
135
136 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
137
138 # to debug, run 'pvestatd -d' and set memtotal here
139 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
140
141 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
142
143 # we try to use about 80% host memory
144 # goal: we want to change memory usage by this amount (positive or negative)
145 my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
146
147 my $maxchange = 100*1024*1024;
148 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
149
150 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
151
152 foreach my $vmid (keys %$vmstatus) {
153 next if !$res->{$vmid};
154 my $d = $vmstatus->{$vmid};
155 my $diff = int($res->{$vmid} - $d->{balloon});
156 my $absdiff = $diff < 0 ? -$diff : $diff;
157 if ($absdiff > 0) {
158 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
159 eval {
160 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
161 value => int($res->{$vmid}));
162 };
163 warn $@ if $@;
164 }
165 }
166}
167
168sub update_qemu_status {
169 my ($status_cfg) = @_;
170
171 my $ctime = time();
172
173 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
174
175 eval { auto_balloning($vmstatus); };
176 syslog('err', "auto ballooning error: $@") if $@;
177
178 foreach my $vmid (keys %$vmstatus) {
179 my $d = $vmstatus->{$vmid};
180 my $data;
181 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
182 my $template = $d->{template} ? $d->{template} : "0";
183 if ($d->{pid}) { # running
184 $data = "$d->{uptime}:$d->{name}:$status:$template:" .
185 "$ctime:$d->{cpus}:$d->{cpu}:" .
186 "$d->{maxmem}:$d->{mem}:" .
187 "$d->{maxdisk}:$d->{disk}:" .
188 "$d->{netin}:$d->{netout}:" .
189 "$d->{diskread}:$d->{diskwrite}";
190 } else {
191 $data = "0:$d->{name}:$status:$template:$ctime:$d->{cpus}::" .
192 "$d->{maxmem}::" .
193 "$d->{maxdisk}:$d->{disk}:" .
194 ":::";
195 }
196 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
197
198 foreach my $id (keys %{$status_cfg->{ids}}) {
199 my $plugin_config = $status_cfg->{ids}->{$id};
200 next if $plugin_config->{disable};
201 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
202 $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime);
203 }
204 }
205}
206
207sub remove_stale_lxc_consoles {
208
209 my $vmstatus = PVE::LXC::vmstatus();
210 my $pidhash = PVE::LXC::find_lxc_console_pids();
211
212 foreach my $vmid (keys %$pidhash) {
213 next if defined($vmstatus->{$vmid});
214 syslog('info', "remove stale lxc-console for CT $vmid");
215 foreach my $pid (@{$pidhash->{$vmid}}) {
216 kill(9, $pid);
217 }
218 }
219}
220
221sub update_lxc_status {
222 my ($status_cfg) = @_;
223
224 my $ctime = time();
225
226 my $vmstatus = PVE::LXC::vmstatus();
227
228 foreach my $vmid (keys %$vmstatus) {
229 my $d = $vmstatus->{$vmid};
230 my $template = $d->{template} ? $d->{template} : "0";
231 my $data;
232 if ($d->{status} eq 'running') { # running
233 $data = "$d->{uptime}:$d->{name}:$d->{status}:$template:" .
234 "$ctime:$d->{cpus}:$d->{cpu}:" .
235 "$d->{maxmem}:$d->{mem}:" .
236 "$d->{maxdisk}:$d->{disk}:" .
237 "$d->{netin}:$d->{netout}:" .
238 "$d->{diskread}:$d->{diskwrite}";
239 } else {
240 $data = "0:$d->{name}:$d->{status}:$template:$ctime:$d->{cpus}::" .
241 "$d->{maxmem}::" .
242 "$d->{maxdisk}:$d->{disk}:" .
243 ":::";
244 }
245 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
246
247 foreach my $id (keys %{$status_cfg->{ids}}) {
248 my $plugin_config = $status_cfg->{ids}->{$id};
249 next if $plugin_config->{disable};
250 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
251 $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime);
252 }
253 }
254}
255
256sub update_storage_status {
257 my ($status_cfg) = @_;
258
259 my $cfg = cfs_read_file("storage.cfg");
260
261 my $ctime = time();
262
263 my $info = PVE::Storage::storage_info($cfg);
264
265 foreach my $storeid (keys %$info) {
266 my $d = $info->{$storeid};
267 next if !$d->{active};
268
269 my $data = "$ctime:$d->{total}:$d->{used}";
270
271 my $key = "pve2-storage/${nodename}/$storeid";
272 PVE::Cluster::broadcast_rrd($key, $data);
273
274 foreach my $id (keys %{$status_cfg->{ids}}) {
275 my $plugin_config = $status_cfg->{ids}->{$id};
276 next if $plugin_config->{disable};
277 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
278 $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
279 }
280 }
281}
282
283sub update_status {
284
285 # update worker list. This is not really required and
286 # we just call this to make sure that we have a correct
287 # list in case of an unexpected crash.
288 eval {
289 my $tlist = PVE::RPCEnvironment::active_workers();
290 PVE::Cluster::broadcast_tasklist($tlist);
291 };
292 my $err = $@;
293 syslog('err', $err) if $err;
294
295 my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
296
297 eval {
298 update_node_status($status_cfg);
299 };
300 $err = $@;
301 syslog('err', "node status update error: $err") if $err;
302
303 eval {
304 update_qemu_status($status_cfg);
305 };
306 $err = $@;
307 syslog('err', "qemu status update error: $err") if $err;
308
309 eval {
310 update_lxc_status($status_cfg);
311 };
312 $err = $@;
313 syslog('err', "lxc status update error: $err") if $err;
314
315 eval {
316 update_storage_status($status_cfg);
317 };
318 $err = $@;
319 syslog('err', "storage status update error: $err") if $err;
320
321 eval {
322 remove_stale_lxc_consoles();
323 };
324 $err = $@;
325 syslog('err', "lxc console cleanup error: $err") if $err;
326}
327
328my $next_update = 0;
329
330# do not update directly after startup, because install scripts
331# have a problem with that
332my $cycle = 0;
333my $updatetime = 10;
334
335my $initial_memory_usage;
336
337sub run {
338 my ($self) = @_;
339
340 for (;;) { # forever
341
342 $next_update = time() + $updatetime;
343
344 if ($cycle) {
345 my ($ccsec, $cusec) = gettimeofday ();
346 eval {
347 # syslog('info', "start status update");
348 PVE::Cluster::cfs_update();
349 update_status();
350 };
351 my $err = $@;
352
353 if ($err) {
354 syslog('err', "status update error: $err");
355 }
356
357 my ($ccsec_end, $cusec_end) = gettimeofday ();
358 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
359
360 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
361 if ($cptime > 5);
362 }
363
364 $cycle++;
365
366 my $mem = PVE::ProcFSTools::read_memory_usage();
367
368 if (!defined($initial_memory_usage) || ($cycle < 10)) {
369 $initial_memory_usage = $mem->{resident};
370 } else {
371 my $diff = $mem->{resident} - $initial_memory_usage;
372 if ($diff > 5*1024*1024) {
373 syslog ('info', "restarting server after $cycle cycles to " .
374 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
375 $self->restart_daemon();
376 }
377 }
378
379 my $wcount = 0;
380 while ((time() < $next_update) &&
381 ($wcount < $updatetime) && # protect against time wrap
382 !$restart_request) { $wcount++; sleep (1); };
383
384 $self->restart_daemon() if $restart_request;
385 }
386}
387
388$daemon->register_start_command();
389$daemon->register_restart_command(1);
390$daemon->register_stop_command();
391$daemon->register_status_command();
392
393our $cmddef = {
394 start => [ __PACKAGE__, 'start', []],
395 restart => [ __PACKAGE__, 'restart', []],
396 stop => [ __PACKAGE__, 'stop', []],
397 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
398};
399
400#my $cmd = shift;
401#PVE::CLIHandler::handle_cmd($cmddef, $0, $cmd, \@ARGV, undef, $0);
402#exit (0);
403
4041;
405
406__END__
407
408=head1 NAME
409
410pvestatd - PVE Status Daemon
411
412=head1 SYNOPSIS
413
414=include synopsis
415
416=head1 DESCRIPTION
417
418This daemom queries the status of VMs, storages and containers at
419regular intervals. The result is sent to all nodes in the cluster.
420
421=include pve_copyright
422
423
424
425
426