1 package PVE
::Service
::pvestatd
;
9 use Time
::HiRes qw
(gettimeofday
);
10 use PVE
::Tools
qw(dir_glob_foreach file_read_firstline);
14 use PVE
::Cluster
qw(cfs_read_file);
18 use PVE
::RPCEnvironment
;
19 use PVE
::API2
::Subscription
;
22 use PVE
::Status
::Plugin
;
23 use PVE
::Status
::Graphite
;
24 use PVE
::Status
::InfluxDB
;
26 PVE
::Status
::Graphite-
>register();
27 PVE
::Status
::InfluxDB-
>register();
28 PVE
::Status
::Plugin-
>init();
30 use base
qw(PVE::Daemon);
35 my $nodename = PVE
::INotify
::nodename
();
37 my $cmdline = [$0, @ARGV];
39 my %daemon_options = (restart_on_error
=> 5, stop_wait_time
=> 5);
40 my $daemon = __PACKAGE__-
>new('pvestatd', $cmdline, %daemon_options);
45 $opt_debug = $self->{debug
};
47 PVE
::Cluster
::cfs_update
();
53 syslog
('info' , "server closing");
56 1 while (waitpid(-1, POSIX
::WNOHANG
()) > 0);
58 $self->exit_daemon(0);
67 sub update_node_status
{
68 my ($status_cfg) = @_;
70 my ($avg1, $avg5, $avg15) = PVE
::ProcFSTools
::read_loadavg
();
72 my $stat = PVE
::ProcFSTools
::read_proc_stat
();
74 my $netdev = PVE
::ProcFSTools
::read_proc_net_dev
();
76 my ($uptime) = PVE
::ProcFSTools
::read_proc_uptime
();
78 my $cpuinfo = PVE
::ProcFSTools
::read_cpuinfo
();
80 my $maxcpu = $cpuinfo->{cpus
};
82 my $subinfo = PVE
::INotify
::read_file
('subscription');
83 my $sublevel = $subinfo->{level
} || '';
85 # traffic from/to physical interface cards
88 foreach my $dev (keys %$netdev) {
89 next if $dev !~ m/^eth\d+$/;
90 $netin += $netdev->{$dev}->{receive
};
91 $netout += $netdev->{$dev}->{transmit
};
94 my $meminfo = PVE
::ProcFSTools
::read_meminfo
();
96 my $dinfo = df
('/', 1); # output is bytes
100 # everything not free is considered to be used
101 my $dused = $dinfo->{blocks
} - $dinfo->{bfree
};
103 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
104 "$meminfo->{memtotal}:$meminfo->{memused}:" .
105 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
106 "$dinfo->{blocks}:$dused:$netin:$netout";
108 PVE
::Cluster
::broadcast_rrd
("pve2-node/$nodename", $data);
110 foreach my $id (keys %{$status_cfg->{ids
}}) {
111 my $plugin_config = $status_cfg->{ids
}->{$id};
112 next if $plugin_config->{disable
};
113 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
116 $d->{uptime
} = $uptime;
117 $d->{cpustat
} = $stat;
118 $d->{cpustat
}->{avg1
} = $avg1;
119 $d->{cpustat
}->{avg5
} = $avg5;
120 $d->{cpustat
}->{avg15
} = $avg15;
121 $d->{cpustat
}->{cpus
} = $maxcpu;
122 $d->{memory
} = $meminfo;
123 $d->{blockstat
} = $dinfo;
124 $d->{nics
} = $netdev;
126 $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
134 return if !$opt_debug;
138 my $hostmeminfo = PVE
::ProcFSTools
::read_meminfo
();
140 # to debug, run 'pvestatd -d' and set memtotal here
141 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
143 my $hostfreemem = $hostmeminfo->{memtotal
} - $hostmeminfo->{memused
};
145 # we try to use about 80% host memory
146 # goal: we want to change memory usage by this amount (positive or negative)
147 my $goal = int($hostmeminfo->{memtotal
}*0.8 - $hostmeminfo->{memused
});
149 my $maxchange = 100*1024*1024;
150 my $res = PVE
::AutoBalloon
::compute_alg1
($vmstatus, $goal, $maxchange);
152 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
154 foreach my $vmid (keys %$vmstatus) {
155 next if !$res->{$vmid};
156 my $d = $vmstatus->{$vmid};
157 my $diff = int($res->{$vmid} - $d->{balloon
});
158 my $absdiff = $diff < 0 ?
-$diff : $diff;
160 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
162 PVE
::QemuServer
::vm_mon_cmd
($vmid, "balloon",
163 value
=> int($res->{$vmid}));
170 sub update_qemu_status
{
171 my ($status_cfg) = @_;
175 my $vmstatus = PVE
::QemuServer
::vmstatus
(undef, 1);
177 eval { auto_balloning
($vmstatus); };
178 syslog
('err', "auto ballooning error: $@") if $@;
180 foreach my $vmid (keys %$vmstatus) {
181 my $d = $vmstatus->{$vmid};
183 my $status = $d->{qmpstatus
} || $d->{status
} || 'stopped';
184 my $template = $d->{template
} ?
$d->{template
} : "0";
185 if ($d->{pid
}) { # running
186 $data = "$d->{uptime}:$d->{name}:$status:$template:" .
187 "$ctime:$d->{cpus}:$d->{cpu}:" .
188 "$d->{maxmem}:$d->{mem}:" .
189 "$d->{maxdisk}:$d->{disk}:" .
190 "$d->{netin}:$d->{netout}:" .
191 "$d->{diskread}:$d->{diskwrite}";
193 $data = "0:$d->{name}:$status:$template:$ctime:$d->{cpus}::" .
195 "$d->{maxdisk}:$d->{disk}:" .
198 PVE
::Cluster
::broadcast_rrd
("pve2.3-vm/$vmid", $data);
200 foreach my $id (keys %{$status_cfg->{ids
}}) {
201 my $plugin_config = $status_cfg->{ids
}->{$id};
202 next if $plugin_config->{disable
};
203 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
204 $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime);
209 sub remove_stale_lxc_consoles
{
211 my $vmstatus = PVE
::LXC
::vmstatus
();
212 my $pidhash = PVE
::LXC
::find_lxc_console_pids
();
214 foreach my $vmid (keys %$pidhash) {
215 next if defined($vmstatus->{$vmid});
216 syslog
('info', "remove stale lxc-console for CT $vmid");
217 foreach my $pid (@{$pidhash->{$vmid}}) {
223 sub update_lxc_status
{
224 my ($status_cfg) = @_;
228 my $vmstatus = PVE
::LXC
::vmstatus
();
230 foreach my $vmid (keys %$vmstatus) {
231 my $d = $vmstatus->{$vmid};
232 my $template = $d->{template
} ?
$d->{template
} : "0";
234 if ($d->{status
} eq 'running') { # running
235 $data = "$d->{uptime}:$d->{name}:$d->{status}:$template:" .
236 "$ctime:$d->{cpus}:$d->{cpu}:" .
237 "$d->{maxmem}:$d->{mem}:" .
238 "$d->{maxdisk}:$d->{disk}:" .
239 "$d->{netin}:$d->{netout}:" .
240 "$d->{diskread}:$d->{diskwrite}";
242 $data = "0:$d->{name}:$d->{status}:$template:$ctime:$d->{cpus}::" .
244 "$d->{maxdisk}:$d->{disk}:" .
247 PVE
::Cluster
::broadcast_rrd
("pve2.3-vm/$vmid", $data);
249 foreach my $id (keys %{$status_cfg->{ids
}}) {
250 my $plugin_config = $status_cfg->{ids
}->{$id};
251 next if $plugin_config->{disable
};
252 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
253 $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime);
258 sub update_storage_status
{
259 my ($status_cfg) = @_;
261 my $cfg = cfs_read_file
("storage.cfg");
265 my $info = PVE
::Storage
::storage_info
($cfg);
267 foreach my $storeid (keys %$info) {
268 my $d = $info->{$storeid};
269 next if !$d->{active
};
271 my $data = "$ctime:$d->{total}:$d->{used}";
273 my $key = "pve2-storage/${nodename}/$storeid";
274 PVE
::Cluster
::broadcast_rrd
($key, $data);
276 foreach my $id (keys %{$status_cfg->{ids
}}) {
277 my $plugin_config = $status_cfg->{ids
}->{$id};
278 next if $plugin_config->{disable
};
279 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
280 $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
287 # update worker list. This is not really required and
288 # we just call this to make sure that we have a correct
289 # list in case of an unexpected crash.
291 my $tlist = PVE
::RPCEnvironment
::active_workers
();
292 PVE
::Cluster
::broadcast_tasklist
($tlist);
295 syslog
('err', $err) if $err;
297 my $status_cfg = PVE
::Cluster
::cfs_read_file
('status.cfg');
300 update_node_status
($status_cfg);
303 syslog
('err', "node status update error: $err") if $err;
306 update_qemu_status
($status_cfg);
309 syslog
('err', "qemu status update error: $err") if $err;
312 update_lxc_status
($status_cfg);
315 syslog
('err', "lxc status update error: $err") if $err;
318 update_storage_status
($status_cfg);
321 syslog
('err', "storage status update error: $err") if $err;
324 remove_stale_lxc_consoles
();
327 syslog
('err', "lxc console cleanup error: $err") if $err;
332 # do not update directly after startup, because install scripts
333 # have a problem with that
337 my $initial_memory_usage;
344 $next_update = time() + $updatetime;
347 my ($ccsec, $cusec) = gettimeofday
();
349 # syslog('info', "start status update");
350 PVE
::Cluster
::cfs_update
();
356 syslog
('err', "status update error: $err");
359 my ($ccsec_end, $cusec_end) = gettimeofday
();
360 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
362 syslog
('info', sprintf("status update time (%.3f seconds)", $cptime))
368 my $mem = PVE
::ProcFSTools
::read_memory_usage
();
370 if (!defined($initial_memory_usage) || ($cycle < 10)) {
371 $initial_memory_usage = $mem->{resident
};
373 my $diff = $mem->{resident
} - $initial_memory_usage;
374 if ($diff > 5*1024*1024) {
375 syslog
('info', "restarting server after $cycle cycles to " .
376 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
377 $self->restart_daemon();
382 while ((time() < $next_update) &&
383 ($wcount < $updatetime) && # protect against time wrap
384 !$restart_request) { $wcount++; sleep (1); };
386 $self->restart_daemon() if $restart_request;
390 $daemon->register_start_command();
391 $daemon->register_restart_command(1);
392 $daemon->register_stop_command();
393 $daemon->register_status_command();
396 start
=> [ __PACKAGE__
, 'start', []],
397 restart
=> [ __PACKAGE__
, 'restart', []],
398 stop
=> [ __PACKAGE__
, 'stop', []],
399 status
=> [ __PACKAGE__
, 'status', [], undef, sub { print shift . "\n";} ],
403 #PVE::CLIHandler::handle_cmd($cmddef, $0, $cmd, \@ARGV, undef, $0);
412 pvestatd - PVE Status Daemon
420 This daemom queries the status of VMs, storages and containers at
421 regular intervals. The result is sent to all nodes in the cluster.
423 =include pve_copyright