]> git.proxmox.com Git - pve-manager.git/blob - PVE/Service/pvestatd.pm
361c518e6317e3b5d0d36d25f65542755cbf6c60
[pve-manager.git] / PVE / Service / pvestatd.pm
1 package PVE::Service::pvestatd;
2
3 use strict;
4 use warnings;
5
6 use PVE::SafeSyslog;
7 use PVE::Daemon;
8
9 use Time::HiRes qw (gettimeofday);
10 use PVE::Tools qw(dir_glob_foreach file_read_firstline);
11 use PVE::ProcFSTools;
12 use PVE::CpuSet;
13 use Filesys::Df;
14 use PVE::INotify;
15 use PVE::Cluster qw(cfs_read_file);
16 use PVE::Storage;
17 use PVE::QemuServer;
18 use PVE::LXC;
19 use PVE::LXC::Config;
20 use PVE::RPCEnvironment;
21 use PVE::API2::Subscription;
22 use PVE::AutoBalloon;
23
24 use PVE::Status::Plugin;
25 use PVE::Status::Graphite;
26 use PVE::Status::InfluxDB;
27
28 PVE::Status::Graphite->register();
29 PVE::Status::InfluxDB->register();
30 PVE::Status::Plugin->init();
31
32 use base qw(PVE::Daemon);
33
34 my $opt_debug;
35 my $restart_request;
36
37 my $nodename = PVE::INotify::nodename();
38
39 my $cmdline = [$0, @ARGV];
40
41 my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
42 my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
43
44 sub init {
45 my ($self) = @_;
46
47 $opt_debug = $self->{debug};
48
49 PVE::Cluster::cfs_update();
50 }
51
52 sub shutdown {
53 my ($self) = @_;
54
55 syslog('info' , "server closing");
56
57 # wait for children
58 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
59
60 $self->exit_daemon(0);
61 }
62
63 sub hup {
64 my ($self) = @_;
65
66 $restart_request = 1;
67 }
68
69 my $generate_rrd_string = sub {
70 my ($data) = @_;
71
72 return join(':', map { $_ // 'U' } @$data);
73 };
74
75 sub update_node_status {
76 my ($status_cfg) = @_;
77
78 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
79
80 my $stat = PVE::ProcFSTools::read_proc_stat();
81
82 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
83
84 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
85
86 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
87
88 my $maxcpu = $cpuinfo->{cpus};
89
90 my $subinfo = PVE::INotify::read_file('subscription');
91 my $sublevel = $subinfo->{level} || '';
92
93 # traffic from/to physical interface cards
94 my $netin = 0;
95 my $netout = 0;
96 foreach my $dev (keys %$netdev) {
97 next if $dev !~ m/^eth\d+$/;
98 $netin += $netdev->{$dev}->{receive};
99 $netout += $netdev->{$dev}->{transmit};
100 }
101
102 my $meminfo = PVE::ProcFSTools::read_meminfo();
103
104 my $dinfo = df('/', 1); # output is bytes
105
106 my $ctime = time();
107
108 # everything not free is considered to be used
109 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
110
111 my $data = $generate_rrd_string->(
112 [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu}, $stat->{wait},
113 $meminfo->{memtotal}, $meminfo->{memused},
114 $meminfo->{swaptotal}, $meminfo->{swapused},
115 $dinfo->{blocks}, $dused, $netin, $netout]);
116
117 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
118
119 foreach my $id (keys %{$status_cfg->{ids}}) {
120 my $plugin_config = $status_cfg->{ids}->{$id};
121 next if $plugin_config->{disable};
122 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
123
124 my $d = {};
125 $d->{uptime} = $uptime;
126 $d->{cpustat} = $stat;
127 $d->{cpustat}->{avg1} = $avg1;
128 $d->{cpustat}->{avg5} = $avg5;
129 $d->{cpustat}->{avg15} = $avg15;
130 $d->{cpustat}->{cpus} = $maxcpu;
131 $d->{memory} = $meminfo;
132 $d->{blockstat} = $dinfo;
133 $d->{nics} = $netdev;
134
135 $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
136 }
137 }
138
139 sub auto_balloning {
140 my ($vmstatus) = @_;
141
142 my $log = sub {
143 return if !$opt_debug;
144 print @_;
145 };
146
147 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
148
149 # to debug, run 'pvestatd -d' and set memtotal here
150 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
151
152 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
153
154 # we try to use about 80% host memory
155 # goal: we want to change memory usage by this amount (positive or negative)
156 my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
157
158 my $maxchange = 100*1024*1024;
159 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
160
161 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
162
163 foreach my $vmid (keys %$vmstatus) {
164 next if !$res->{$vmid};
165 my $d = $vmstatus->{$vmid};
166 my $diff = int($res->{$vmid} - $d->{balloon});
167 my $absdiff = $diff < 0 ? -$diff : $diff;
168 if ($absdiff > 0) {
169 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
170 eval {
171 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
172 value => int($res->{$vmid}));
173 };
174 warn $@ if $@;
175 }
176 }
177 }
178
179 sub update_qemu_status {
180 my ($status_cfg) = @_;
181
182 my $ctime = time();
183
184 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
185
186 eval { auto_balloning($vmstatus); };
187 syslog('err', "auto ballooning error: $@") if $@;
188
189 foreach my $vmid (keys %$vmstatus) {
190 my $d = $vmstatus->{$vmid};
191 my $data;
192 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
193 my $template = $d->{template} ? $d->{template} : "0";
194 if ($d->{pid}) { # running
195 $data = $generate_rrd_string->(
196 [$d->{uptime}, $d->{name}, $status, $template, $ctime, $d->{cpus}, $d->{cpu},
197 $d->{maxmem}, $d->{mem}, $d->{maxdisk}, $d->{disk},
198 $d->{netin}, $d->{netout}, $d->{diskread}, $d->{diskwrite}]);
199 } else {
200 $data = $generate_rrd_string->(
201 [0, $d->{name}, $status, $template, $ctime, $d->{cpus}, undef,
202 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
203 }
204 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
205
206 foreach my $id (keys %{$status_cfg->{ids}}) {
207 my $plugin_config = $status_cfg->{ids}->{$id};
208 next if $plugin_config->{disable};
209 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
210 $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime, $nodename);
211 }
212 }
213 }
214
215 sub remove_stale_lxc_consoles {
216
217 my $vmstatus = PVE::LXC::vmstatus();
218 my $pidhash = PVE::LXC::find_lxc_console_pids();
219
220 foreach my $vmid (keys %$pidhash) {
221 next if defined($vmstatus->{$vmid});
222 syslog('info', "remove stale lxc-console for CT $vmid");
223 foreach my $pid (@{$pidhash->{$vmid}}) {
224 kill(9, $pid);
225 }
226 }
227 }
228
229 my $rebalance_error_count = {};
230
231 sub rebalance_lxc_containers {
232
233 return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
234
235 my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
236 my @allowed_cpus = $all_cpus->members();
237 my $cpucount = scalar(@allowed_cpus);
238 my $max_cpuid = PVE::CpuSet::max_cpuid();
239
240 my @cpu_ctcount = (0) x $max_cpuid;
241 my @balanced_cts;
242
243 my $modify_cpuset = sub {
244 my ($vmid, $cpuset, $newset) = @_;
245
246 if (!$rebalance_error_count->{$vmid}) {
247 syslog('info', "modified cpu set for lxc/$vmid: " .
248 $newset->short_string());
249 }
250
251 eval {
252
253 if (-d "/sys/fs/cgroup/cpuset/lxc/$vmid/ns") {
254 # allow all, so that we can set new cpuset in /ns
255 $all_cpus->write_to_cgroup("lxc/$vmid");
256 eval {
257 $newset->write_to_cgroup("lxc/$vmid/ns");
258 };
259 if (my $err = $@) {
260 warn $err if !$rebalance_error_count->{$vmid}++;
261 # restore original
262 $cpuset->write_to_cgroup("lxc/$vmid");
263 } else {
264 # also apply to container root cgroup
265 $newset->write_to_cgroup("lxc/$vmid");
266 $rebalance_error_count->{$vmid} = 0;
267 }
268 } else {
269 # old style container
270 $newset->write_to_cgroup("lxc/$vmid");
271 $rebalance_error_count->{$vmid} = 0;
272 }
273 };
274 if (my $err = $@) {
275 warn $err if !$rebalance_error_count->{$vmid}++;
276 }
277 };
278
279 my $ctlist = PVE::LXC::config_list();
280
281 foreach my $vmid (sort keys %$ctlist) {
282 next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
283
284 my ($conf, $cpuset);
285 eval {
286
287 $conf = PVE::LXC::Config->load_config($vmid);
288
289 $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
290 };
291 if (my $err = $@) {
292 warn $err;
293 next;
294 }
295
296 my @cpuset_members = $cpuset->members();
297
298 if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
299
300 my $cores = $conf->{cores} || $cpucount;
301 $cores = $cpucount if $cores > $cpucount;
302
303 # see if the number of cores was hot-reduced or
304 # hasn't been enacted at all yet
305 my $newset = PVE::CpuSet->new();
306 if ($cores < scalar(@cpuset_members)) {
307 for (my $i = 0; $i < $cores; $i++) {
308 $newset->insert($cpuset_members[$i]);
309 }
310 } elsif ($cores > scalar(@cpuset_members)) {
311 my $count = $newset->insert(@cpuset_members);
312 foreach my $cpu (@allowed_cpus) {
313 $count += $newset->insert($cpu);
314 last if $count >= $cores;
315 }
316 } else {
317 $newset->insert(@cpuset_members);
318 }
319
320 # Apply hot-plugged changes if any:
321 if (!$newset->is_equal($cpuset)) {
322 @cpuset_members = $newset->members();
323 $modify_cpuset->($vmid, $cpuset, $newset);
324 }
325
326 # Note: no need to rebalance if we already use all cores
327 push @balanced_cts, [$vmid, $cores, $newset]
328 if defined($conf->{cores}) && ($cores != $cpucount);
329 }
330
331 foreach my $cpu (@cpuset_members) {
332 $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
333 }
334 }
335
336 my $find_best_cpu = sub {
337 my ($cpulist, $cpu) = @_;
338
339 my $cur_cost = $cpu_ctcount[$cpu];
340 my $cur_cpu = $cpu;
341
342 foreach my $candidate (@$cpulist) {
343 my $cost = $cpu_ctcount[$candidate];
344 if ($cost < ($cur_cost -1)) {
345 $cur_cost = $cost;
346 $cur_cpu = $candidate;
347 }
348 }
349
350 return $cur_cpu;
351 };
352
353 foreach my $bct (@balanced_cts) {
354 my ($vmid, $cores, $cpuset) = @$bct;
355
356 my $newset = PVE::CpuSet->new();
357
358 my $rest = [];
359 foreach my $cpu (@allowed_cpus) {
360 next if $cpuset->has($cpu);
361 push @$rest, $cpu;
362 }
363
364 my @members = $cpuset->members();
365 foreach my $cpu (@members) {
366 my $best = &$find_best_cpu($rest, $cpu);
367 if ($best != $cpu) {
368 $cpu_ctcount[$best]++;
369 $cpu_ctcount[$cpu]--;
370 }
371 $newset->insert($best);
372 }
373
374 if (!$newset->is_equal($cpuset)) {
375 $modify_cpuset->($vmid, $cpuset, $newset);
376 }
377 }
378 }
379
380 sub update_lxc_status {
381 my ($status_cfg) = @_;
382
383 my $ctime = time();
384
385 my $vmstatus = PVE::LXC::vmstatus();
386
387 foreach my $vmid (keys %$vmstatus) {
388 my $d = $vmstatus->{$vmid};
389 my $template = $d->{template} ? $d->{template} : "0";
390 my $data;
391 if ($d->{status} eq 'running') { # running
392 $data = $generate_rrd_string->(
393 [$d->{uptime}, $d->{name}, $d->{status}, $template,
394 $ctime, $d->{cpus}, $d->{cpu},
395 $d->{maxmem}, $d->{mem},
396 $d->{maxdisk}, $d->{disk},
397 $d->{netin}, $d->{netout},
398 $d->{diskread}, $d->{diskwrite}]);
399 } else {
400 $data = $generate_rrd_string->(
401 [0, $d->{name}, $d->{status}, $template, $ctime, $d->{cpus}, undef,
402 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
403 }
404 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
405
406 foreach my $id (keys %{$status_cfg->{ids}}) {
407 my $plugin_config = $status_cfg->{ids}->{$id};
408 next if $plugin_config->{disable};
409 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
410 $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime, $nodename);
411 }
412 }
413 }
414
415 sub update_storage_status {
416 my ($status_cfg) = @_;
417
418 my $cfg = PVE::Storage::config();
419
420 my $ctime = time();
421
422 my $info = PVE::Storage::storage_info($cfg);
423
424 foreach my $storeid (keys %$info) {
425 my $d = $info->{$storeid};
426 next if !$d->{active};
427
428 my $data = $generate_rrd_string->([$ctime, $d->{total}, $d->{used}]);
429
430 my $key = "pve2-storage/${nodename}/$storeid";
431 PVE::Cluster::broadcast_rrd($key, $data);
432
433 foreach my $id (keys %{$status_cfg->{ids}}) {
434 my $plugin_config = $status_cfg->{ids}->{$id};
435 next if $plugin_config->{disable};
436 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
437 $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
438 }
439 }
440 }
441
442 sub update_status {
443
444 # update worker list. This is not really required and
445 # we just call this to make sure that we have a correct
446 # list in case of an unexpected crash.
447 my $rpcenv = PVE::RPCEnvironment::get();
448
449 eval {
450 my $tlist = $rpcenv->active_workers();
451 PVE::Cluster::broadcast_tasklist($tlist);
452 };
453 my $err = $@;
454 syslog('err', $err) if $err;
455
456 my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
457
458 eval {
459 update_node_status($status_cfg);
460 };
461 $err = $@;
462 syslog('err', "node status update error: $err") if $err;
463
464 eval {
465 update_qemu_status($status_cfg);
466 };
467 $err = $@;
468 syslog('err', "qemu status update error: $err") if $err;
469
470 eval {
471 update_lxc_status($status_cfg);
472 };
473 $err = $@;
474 syslog('err', "lxc status update error: $err") if $err;
475
476 eval {
477 rebalance_lxc_containers();
478 };
479 $err = $@;
480 syslog('err', "lxc cpuset rebalance error: $err") if $err;
481
482 eval {
483 update_storage_status($status_cfg);
484 };
485 $err = $@;
486 syslog('err', "storage status update error: $err") if $err;
487
488 eval {
489 remove_stale_lxc_consoles();
490 };
491 $err = $@;
492 syslog('err', "lxc console cleanup error: $err") if $err;
493 }
494
495 my $next_update = 0;
496
497 # do not update directly after startup, because install scripts
498 # have a problem with that
499 my $cycle = 0;
500 my $updatetime = 10;
501
502 my $initial_memory_usage;
503
504 sub run {
505 my ($self) = @_;
506
507 for (;;) { # forever
508
509 $next_update = time() + $updatetime;
510
511 if ($cycle) {
512 my ($ccsec, $cusec) = gettimeofday ();
513 eval {
514 # syslog('info', "start status update");
515 PVE::Cluster::cfs_update();
516 update_status();
517 };
518 my $err = $@;
519
520 if ($err) {
521 syslog('err', "status update error: $err");
522 }
523
524 my ($ccsec_end, $cusec_end) = gettimeofday ();
525 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
526
527 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
528 if ($cptime > 5);
529 }
530
531 $cycle++;
532
533 my $mem = PVE::ProcFSTools::read_memory_usage();
534
535 if (!defined($initial_memory_usage) || ($cycle < 10)) {
536 $initial_memory_usage = $mem->{resident};
537 } else {
538 my $diff = $mem->{resident} - $initial_memory_usage;
539 if ($diff > 5*1024*1024) {
540 syslog ('info', "restarting server after $cycle cycles to " .
541 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
542 $self->restart_daemon();
543 }
544 }
545
546 my $wcount = 0;
547 while ((time() < $next_update) &&
548 ($wcount < $updatetime) && # protect against time wrap
549 !$restart_request) { $wcount++; sleep (1); };
550
551 $self->restart_daemon() if $restart_request;
552 }
553 }
554
555 $daemon->register_start_command();
556 $daemon->register_restart_command(1);
557 $daemon->register_stop_command();
558 $daemon->register_status_command();
559
560 our $cmddef = {
561 start => [ __PACKAGE__, 'start', []],
562 restart => [ __PACKAGE__, 'restart', []],
563 stop => [ __PACKAGE__, 'stop', []],
564 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
565 };
566
567 1;
568
569
570
571
572