1 package PVE
::Service
::pvestatd
;
9 use Time
::HiRes qw
(gettimeofday
);
10 use PVE
::Tools
qw(dir_glob_foreach file_read_firstline);
15 use PVE
::Cluster
qw(cfs_read_file);
20 use PVE
::RPCEnvironment
;
21 use PVE
::API2
::Subscription
;
24 use PVE
::Status
::Plugin
;
25 use PVE
::Status
::Graphite
;
26 use PVE
::Status
::InfluxDB
;
28 PVE
::Status
::Graphite-
>register();
29 PVE
::Status
::InfluxDB-
>register();
30 PVE
::Status
::Plugin-
>init();
32 use base
qw(PVE::Daemon);
37 my $nodename = PVE
::INotify
::nodename
();
39 my $cmdline = [$0, @ARGV];
41 my %daemon_options = (restart_on_error
=> 5, stop_wait_time
=> 5);
42 my $daemon = __PACKAGE__-
>new('pvestatd', $cmdline, %daemon_options);
47 $opt_debug = $self->{debug
};
49 PVE
::Cluster
::cfs_update
();
55 syslog
('info' , "server closing");
58 1 while (waitpid(-1, POSIX
::WNOHANG
()) > 0);
60 $self->exit_daemon(0);
69 my $generate_rrd_string = sub {
72 return join(':', map { $_ // 'U' } @$data);
75 sub update_node_status
{
76 my ($status_cfg) = @_;
78 my ($avg1, $avg5, $avg15) = PVE
::ProcFSTools
::read_loadavg
();
80 my $stat = PVE
::ProcFSTools
::read_proc_stat
();
82 my $netdev = PVE
::ProcFSTools
::read_proc_net_dev
();
84 my ($uptime) = PVE
::ProcFSTools
::read_proc_uptime
();
86 my $cpuinfo = PVE
::ProcFSTools
::read_cpuinfo
();
88 my $maxcpu = $cpuinfo->{cpus
};
90 my $subinfo = PVE
::INotify
::read_file
('subscription');
91 my $sublevel = $subinfo->{level
} || '';
93 # traffic from/to physical interface cards
96 foreach my $dev (keys %$netdev) {
97 next if $dev !~ m/^eth\d+$/;
98 $netin += $netdev->{$dev}->{receive
};
99 $netout += $netdev->{$dev}->{transmit
};
102 my $meminfo = PVE
::ProcFSTools
::read_meminfo
();
104 my $dinfo = df
('/', 1); # output is bytes
108 # everything not free is considered to be used
109 my $dused = $dinfo->{blocks
} - $dinfo->{bfree
};
111 my $data = $generate_rrd_string->(
112 [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu
}, $stat->{wait},
113 $meminfo->{memtotal
}, $meminfo->{memused
},
114 $meminfo->{swaptotal
}, $meminfo->{swapused
},
115 $dinfo->{blocks
}, $dused, $netin, $netout]);
117 PVE
::Cluster
::broadcast_rrd
("pve2-node/$nodename", $data);
119 foreach my $id (keys %{$status_cfg->{ids
}}) {
120 my $plugin_config = $status_cfg->{ids
}->{$id};
121 next if $plugin_config->{disable
};
122 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
125 $d->{uptime
} = $uptime;
126 $d->{cpustat
} = $stat;
127 $d->{cpustat
}->{avg1
} = $avg1;
128 $d->{cpustat
}->{avg5
} = $avg5;
129 $d->{cpustat
}->{avg15
} = $avg15;
130 $d->{cpustat
}->{cpus
} = $maxcpu;
131 $d->{memory
} = $meminfo;
132 $d->{blockstat
} = $dinfo;
133 $d->{nics
} = $netdev;
135 $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
143 return if !$opt_debug;
147 my $hostmeminfo = PVE
::ProcFSTools
::read_meminfo
();
149 # to debug, run 'pvestatd -d' and set memtotal here
150 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
152 my $hostfreemem = $hostmeminfo->{memtotal
} - $hostmeminfo->{memused
};
154 # we try to use about 80% host memory
155 # goal: we want to change memory usage by this amount (positive or negative)
156 my $goal = int($hostmeminfo->{memtotal
}*0.8 - $hostmeminfo->{memused
});
158 my $maxchange = 100*1024*1024;
159 my $res = PVE
::AutoBalloon
::compute_alg1
($vmstatus, $goal, $maxchange);
161 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
163 foreach my $vmid (keys %$vmstatus) {
164 next if !$res->{$vmid};
165 my $d = $vmstatus->{$vmid};
166 my $diff = int($res->{$vmid} - $d->{balloon
});
167 my $absdiff = $diff < 0 ?
-$diff : $diff;
169 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
171 PVE
::QemuServer
::vm_mon_cmd
($vmid, "balloon",
172 value
=> int($res->{$vmid}));
179 sub update_qemu_status
{
180 my ($status_cfg) = @_;
184 my $vmstatus = PVE
::QemuServer
::vmstatus
(undef, 1);
186 eval { auto_balloning
($vmstatus); };
187 syslog
('err', "auto ballooning error: $@") if $@;
189 foreach my $vmid (keys %$vmstatus) {
190 my $d = $vmstatus->{$vmid};
192 my $status = $d->{qmpstatus
} || $d->{status
} || 'stopped';
193 my $template = $d->{template
} ?
$d->{template
} : "0";
194 if ($d->{pid
}) { # running
195 $data = $generate_rrd_string->(
196 [$d->{uptime
}, $d->{name
}, $status, $template, $ctime, $d->{cpus
}, $d->{cpu
},
197 $d->{maxmem
}, $d->{mem
}, $d->{maxdisk
}, $d->{disk
},
198 $d->{netin
}, $d->{netout
}, $d->{diskread
}, $d->{diskwrite
}]);
200 $data = $generate_rrd_string->(
201 [0, $d->{name
}, $status, $template, $ctime, $d->{cpus
}, undef,
202 $d->{maxmem
}, undef, $d->{maxdisk
}, $d->{disk
}, undef, undef, undef, undef]);
204 PVE
::Cluster
::broadcast_rrd
("pve2.3-vm/$vmid", $data);
206 foreach my $id (keys %{$status_cfg->{ids
}}) {
207 my $plugin_config = $status_cfg->{ids
}->{$id};
208 next if $plugin_config->{disable
};
209 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
210 $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime, $nodename);
215 sub remove_stale_lxc_consoles
{
217 my $vmstatus = PVE
::LXC
::vmstatus
();
218 my $pidhash = PVE
::LXC
::find_lxc_console_pids
();
220 foreach my $vmid (keys %$pidhash) {
221 next if defined($vmstatus->{$vmid});
222 syslog
('info', "remove stale lxc-console for CT $vmid");
223 foreach my $pid (@{$pidhash->{$vmid}}) {
229 my $rebalance_error_count = {};
231 sub rebalance_lxc_containers
{
233 return if !-d
'/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
235 my $all_cpus = PVE
::CpuSet-
>new_from_cgroup('lxc', 'effective_cpus');
236 my @allowed_cpus = $all_cpus->members();
237 my $cpucount = scalar(@allowed_cpus);
238 my $max_cpuid = PVE
::CpuSet
::max_cpuid
();
240 my @cpu_ctcount = (0) x
$max_cpuid;
243 my $modify_cpuset = sub {
244 my ($vmid, $cpuset, $newset) = @_;
246 if (!$rebalance_error_count->{$vmid}) {
247 syslog
('info', "modified cpu set for lxc/$vmid: " .
248 $newset->short_string());
253 if (-d
"/sys/fs/cgroup/cpuset/lxc/$vmid/ns") {
254 # allow all, so that we can set new cpuset in /ns
255 $all_cpus->write_to_cgroup("lxc/$vmid");
257 $newset->write_to_cgroup("lxc/$vmid/ns");
260 warn $err if !$rebalance_error_count->{$vmid}++;
262 $cpuset->write_to_cgroup("lxc/$vmid");
264 # also apply to container root cgroup
265 $newset->write_to_cgroup("lxc/$vmid");
266 $rebalance_error_count->{$vmid} = 0;
269 # old style container
270 $newset->write_to_cgroup("lxc/$vmid");
271 $rebalance_error_count->{$vmid} = 0;
275 warn $err if !$rebalance_error_count->{$vmid}++;
279 my $ctlist = PVE
::LXC
::config_list
();
281 foreach my $vmid (sort keys %$ctlist) {
282 next if ! -d
"/sys/fs/cgroup/cpuset/lxc/$vmid";
287 $conf = PVE
::LXC
::Config-
>load_config($vmid);
289 $cpuset = PVE
::CpuSet-
>new_from_cgroup("lxc/$vmid");
296 my @cpuset_members = $cpuset->members();
298 if (!PVE
::LXC
::Config-
>has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
300 my $cores = $conf->{cores
} || $cpucount;
301 $cores = $cpucount if $cores > $cpucount;
303 # see if the number of cores was hot-reduced or
304 # hasn't been enacted at all yet
305 my $newset = PVE
::CpuSet-
>new();
306 if ($cores < scalar(@cpuset_members)) {
307 for (my $i = 0; $i < $cores; $i++) {
308 $newset->insert($cpuset_members[$i]);
310 } elsif ($cores > scalar(@cpuset_members)) {
311 my $count = $newset->insert(@cpuset_members);
312 foreach my $cpu (@allowed_cpus) {
313 $count += $newset->insert($cpu);
314 last if $count >= $cores;
317 $newset->insert(@cpuset_members);
320 # Apply hot-plugged changes if any:
321 if (!$newset->is_equal($cpuset)) {
322 @cpuset_members = $newset->members();
323 $modify_cpuset->($vmid, $cpuset, $newset);
326 # Note: no need to rebalance if we already use all cores
327 push @balanced_cts, [$vmid, $cores, $newset]
328 if defined($conf->{cores
}) && ($cores != $cpucount);
331 foreach my $cpu (@cpuset_members) {
332 $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
336 my $find_best_cpu = sub {
337 my ($cpulist, $cpu) = @_;
339 my $cur_cost = $cpu_ctcount[$cpu];
342 foreach my $candidate (@$cpulist) {
343 my $cost = $cpu_ctcount[$candidate];
344 if ($cost < ($cur_cost -1)) {
346 $cur_cpu = $candidate;
353 foreach my $bct (@balanced_cts) {
354 my ($vmid, $cores, $cpuset) = @$bct;
356 my $newset = PVE
::CpuSet-
>new();
359 foreach my $cpu (@allowed_cpus) {
360 next if $cpuset->has($cpu);
364 my @members = $cpuset->members();
365 foreach my $cpu (@members) {
366 my $best = &$find_best_cpu($rest, $cpu);
368 $cpu_ctcount[$best]++;
369 $cpu_ctcount[$cpu]--;
371 $newset->insert($best);
374 if (!$newset->is_equal($cpuset)) {
375 $modify_cpuset->($vmid, $cpuset, $newset);
380 sub update_lxc_status
{
381 my ($status_cfg) = @_;
385 my $vmstatus = PVE
::LXC
::vmstatus
();
387 foreach my $vmid (keys %$vmstatus) {
388 my $d = $vmstatus->{$vmid};
389 my $template = $d->{template
} ?
$d->{template
} : "0";
391 if ($d->{status
} eq 'running') { # running
392 $data = $generate_rrd_string->(
393 [$d->{uptime
}, $d->{name
}, $d->{status
}, $template,
394 $ctime, $d->{cpus
}, $d->{cpu
},
395 $d->{maxmem
}, $d->{mem
},
396 $d->{maxdisk
}, $d->{disk
},
397 $d->{netin
}, $d->{netout
},
398 $d->{diskread
}, $d->{diskwrite
}]);
400 $data = $generate_rrd_string->(
401 [0, $d->{name
}, $d->{status
}, $template, $ctime, $d->{cpus
}, undef,
402 $d->{maxmem
}, undef, $d->{maxdisk
}, $d->{disk
}, undef, undef, undef, undef]);
404 PVE
::Cluster
::broadcast_rrd
("pve2.3-vm/$vmid", $data);
406 foreach my $id (keys %{$status_cfg->{ids
}}) {
407 my $plugin_config = $status_cfg->{ids
}->{$id};
408 next if $plugin_config->{disable
};
409 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
410 $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime, $nodename);
415 sub update_storage_status
{
416 my ($status_cfg) = @_;
418 my $cfg = PVE
::Storage
::config
();
422 my $info = PVE
::Storage
::storage_info
($cfg);
424 foreach my $storeid (keys %$info) {
425 my $d = $info->{$storeid};
426 next if !$d->{active
};
428 my $data = $generate_rrd_string->([$ctime, $d->{total
}, $d->{used
}]);
430 my $key = "pve2-storage/${nodename}/$storeid";
431 PVE
::Cluster
::broadcast_rrd
($key, $data);
433 foreach my $id (keys %{$status_cfg->{ids
}}) {
434 my $plugin_config = $status_cfg->{ids
}->{$id};
435 next if $plugin_config->{disable
};
436 my $plugin = PVE
::Status
::Plugin-
>lookup($plugin_config->{type
});
437 $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
444 # update worker list. This is not really required and
445 # we just call this to make sure that we have a correct
446 # list in case of an unexpected crash.
447 my $rpcenv = PVE
::RPCEnvironment
::get
();
450 my $tlist = $rpcenv->active_workers();
451 PVE
::Cluster
::broadcast_tasklist
($tlist);
454 syslog
('err', $err) if $err;
456 my $status_cfg = PVE
::Cluster
::cfs_read_file
('status.cfg');
459 update_node_status
($status_cfg);
462 syslog
('err', "node status update error: $err") if $err;
465 update_qemu_status
($status_cfg);
468 syslog
('err', "qemu status update error: $err") if $err;
471 update_lxc_status
($status_cfg);
474 syslog
('err', "lxc status update error: $err") if $err;
477 rebalance_lxc_containers
();
480 syslog
('err', "lxc cpuset rebalance error: $err") if $err;
483 update_storage_status
($status_cfg);
486 syslog
('err', "storage status update error: $err") if $err;
489 remove_stale_lxc_consoles
();
492 syslog
('err', "lxc console cleanup error: $err") if $err;
497 # do not update directly after startup, because install scripts
498 # have a problem with that
502 my $initial_memory_usage;
509 $next_update = time() + $updatetime;
512 my ($ccsec, $cusec) = gettimeofday
();
514 # syslog('info', "start status update");
515 PVE
::Cluster
::cfs_update
();
521 syslog
('err', "status update error: $err");
524 my ($ccsec_end, $cusec_end) = gettimeofday
();
525 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
527 syslog
('info', sprintf("status update time (%.3f seconds)", $cptime))
533 my $mem = PVE
::ProcFSTools
::read_memory_usage
();
535 if (!defined($initial_memory_usage) || ($cycle < 10)) {
536 $initial_memory_usage = $mem->{resident
};
538 my $diff = $mem->{resident
} - $initial_memory_usage;
539 if ($diff > 5*1024*1024) {
540 syslog
('info', "restarting server after $cycle cycles to " .
541 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
542 $self->restart_daemon();
547 while ((time() < $next_update) &&
548 ($wcount < $updatetime) && # protect against time wrap
549 !$restart_request) { $wcount++; sleep (1); };
551 $self->restart_daemon() if $restart_request;
555 $daemon->register_start_command();
556 $daemon->register_restart_command(1);
557 $daemon->register_stop_command();
558 $daemon->register_status_command();
561 start
=> [ __PACKAGE__
, 'start', []],
562 restart
=> [ __PACKAGE__
, 'restart', []],
563 stop
=> [ __PACKAGE__
, 'stop', []],
564 status
=> [ __PACKAGE__
, 'status', [], undef, sub { print shift . "\n";} ],