]> git.proxmox.com Git - pve-manager.git/blame - PVE/Service/pvestatd.pm
bump version to 6.0-12
[pve-manager.git] / PVE / Service / pvestatd.pm
CommitLineData
efd04666
DM
1package PVE::Service::pvestatd;
2
3use strict;
4use warnings;
5
6use PVE::SafeSyslog;
7use PVE::Daemon;
8
fea39196
DC
9use JSON;
10
efd04666
DM
11use Time::HiRes qw (gettimeofday);
12use PVE::Tools qw(dir_glob_foreach file_read_firstline);
13use PVE::ProcFSTools;
41db757b 14use PVE::CpuSet;
efd04666
DM
15use Filesys::Df;
16use PVE::INotify;
0fcced16 17use PVE::Network;
efd04666
DM
18use PVE::Cluster qw(cfs_read_file);
19use PVE::Storage;
20use PVE::QemuServer;
21use PVE::LXC;
41db757b 22use PVE::LXC::Config;
efd04666
DM
23use PVE::RPCEnvironment;
24use PVE::API2::Subscription;
25use PVE::AutoBalloon;
5ea29d13 26use PVE::AccessControl;
fea39196
DC
27use PVE::Ceph::Services;
28use PVE::Ceph::Tools;
efd04666 29
f1f4bfef 30use PVE::ExtMetric;
efd04666 31use PVE::Status::Plugin;
efd04666
DM
32
33use base qw(PVE::Daemon);
34
a36565ba
AD
35my $have_sdn;
36eval {
37 require PVE::API2::Network::SDN;
38 $have_sdn = 1;
39};
40
efd04666
DM
41my $opt_debug;
42my $restart_request;
43
44my $nodename = PVE::INotify::nodename();
45
46my $cmdline = [$0, @ARGV];
47
48my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
49my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
50
51sub init {
52 my ($self) = @_;
53
54 $opt_debug = $self->{debug};
55
56 PVE::Cluster::cfs_update();
57}
58
59sub shutdown {
60 my ($self) = @_;
61
62 syslog('info' , "server closing");
63
64 # wait for children
65 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
66
67 $self->exit_daemon(0);
68}
69
70sub hup {
71 my ($self) = @_;
72
73 $restart_request = 1;
74}
75
50786956
DM
76my $generate_rrd_string = sub {
77 my ($data) = @_;
78
79 return join(':', map { $_ // 'U' } @$data);
80};
81
efd04666
DM
82sub update_node_status {
83 my ($status_cfg) = @_;
84
efd04666
DM
85 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
86
78873100
TL
87 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
88 my $stat = PVE::ProcFSTools::read_proc_stat();
efd04666 89 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
78873100 90 my $maxcpu = $cpuinfo->{cpus};
efd04666
DM
91
92 my $subinfo = PVE::INotify::read_file('subscription');
93 my $sublevel = $subinfo->{level} || '';
94
78873100 95 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
efd04666 96 # traffic from/to physical interface cards
78873100
TL
97 my ($netin, $netout) = (0, 0);
98 for my $dev (grep { /^$PVE::Network::PHYSICAL_NIC_RE$/ } keys %$netdev) {
efd04666
DM
99 $netin += $netdev->{$dev}->{receive};
100 $netout += $netdev->{$dev}->{transmit};
101 }
78873100 102
efd04666
DM
103 my $meminfo = PVE::ProcFSTools::read_meminfo();
104
105 my $dinfo = df('/', 1); # output is bytes
efd04666
DM
106 # everything not free is considered to be used
107 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
108
78873100
TL
109 my $ctime = time();
110
50786956
DM
111 my $data = $generate_rrd_string->(
112 [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu}, $stat->{wait},
113 $meminfo->{memtotal}, $meminfo->{memused},
114 $meminfo->{swaptotal}, $meminfo->{swapused},
78873100
TL
115 $dinfo->{blocks}, $dused, $netin, $netout]
116 );
efd04666
DM
117 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
118
5e82aaac
TL
119 my $node_metric = {
120 uptime => $uptime,
121 cpustat => $stat,
122 memory => $meminfo,
123 blockstat => $dinfo,
124 nics => $netdev,
125 };
126 $node_metric->{cpustat}->@{qw(avg1 avg5 avg15)} = ($avg1, $avg5, $avg15);
127 $node_metric->{cpustat}->{cpus} = $maxcpu;
128
f1f4bfef 129 PVE::ExtMetric::update_all($status_cfg, 'node', $nodename, $node_metric, $ctime);
efd04666
DM
130}
131
132sub auto_balloning {
133 my ($vmstatus) = @_;
134
0dd73a7f 135 my $log = sub { $opt_debug and printf @_ };
efd04666
DM
136
137 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
0dd73a7f 138 # NOTE: to debug, run 'pvestatd -d' and set memtotal here
efd04666 139 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
efd04666
DM
140 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
141
0dd73a7f
TL
142 # try to use ~80% host memory; goal is the change amount required to achieve that
143 my $goal = int($hostmeminfo->{memtotal} * 0.8 - $hostmeminfo->{memused});
144 $log->("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
efd04666
DM
145
146 my $maxchange = 100*1024*1024;
147 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
efd04666 148
0dd73a7f
TL
149 for my $vmid (sort keys %$res) {
150 my $target = int($res->{$vmid});
151 my $current = int($vmstatus->{$vmid}->{balloon});
152 next if $target == $current; # no need to change
153
154 $log->("BALLOON $vmid to $target (%d)\n", $target - $current);
155 eval { PVE::QemuServer::vm_mon_cmd($vmid, "balloon", value => $target) };
156 warn $@ if $@;
efd04666
DM
157 }
158}
159
160sub update_qemu_status {
161 my ($status_cfg) = @_;
162
163 my $ctime = time();
164
165 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
166
167 eval { auto_balloning($vmstatus); };
168 syslog('err', "auto ballooning error: $@") if $@;
169
170 foreach my $vmid (keys %$vmstatus) {
171 my $d = $vmstatus->{$vmid};
172 my $data;
173 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
174 my $template = $d->{template} ? $d->{template} : "0";
175 if ($d->{pid}) { # running
50786956
DM
176 $data = $generate_rrd_string->(
177 [$d->{uptime}, $d->{name}, $status, $template, $ctime, $d->{cpus}, $d->{cpu},
178 $d->{maxmem}, $d->{mem}, $d->{maxdisk}, $d->{disk},
179 $d->{netin}, $d->{netout}, $d->{diskread}, $d->{diskwrite}]);
efd04666 180 } else {
50786956
DM
181 $data = $generate_rrd_string->(
182 [0, $d->{name}, $status, $template, $ctime, $d->{cpus}, undef,
183 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
efd04666
DM
184 }
185 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
186
f1f4bfef 187 PVE::ExtMetric::update_all($status_cfg, 'qemu', $vmid, $d, $ctime, $nodename);
efd04666
DM
188 }
189}
190
191sub remove_stale_lxc_consoles {
192
193 my $vmstatus = PVE::LXC::vmstatus();
194 my $pidhash = PVE::LXC::find_lxc_console_pids();
195
196 foreach my $vmid (keys %$pidhash) {
197 next if defined($vmstatus->{$vmid});
198 syslog('info', "remove stale lxc-console for CT $vmid");
199 foreach my $pid (@{$pidhash->{$vmid}}) {
200 kill(9, $pid);
201 }
202 }
203}
204
b3f1adb2
DM
205my $rebalance_error_count = {};
206
41db757b 207sub rebalance_lxc_containers {
41db757b
DM
208
209 return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
210
211 my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
212 my @allowed_cpus = $all_cpus->members();
213 my $cpucount = scalar(@allowed_cpus);
127470f4 214 my $max_cpuid = $allowed_cpus[-1];
41db757b 215
127470f4 216 my @cpu_ctcount = (0) x ($max_cpuid+1);
41db757b
DM
217 my @balanced_cts;
218
0b959507
DM
219 my $modify_cpuset = sub {
220 my ($vmid, $cpuset, $newset) = @_;
221
b3f1adb2
DM
222 if (!$rebalance_error_count->{$vmid}) {
223 syslog('info', "modified cpu set for lxc/$vmid: " .
224 $newset->short_string());
225 }
226
0b959507 227 eval {
cbce367d
DM
228
229 if (-d "/sys/fs/cgroup/cpuset/lxc/$vmid/ns") {
230 # allow all, so that we can set new cpuset in /ns
231 $all_cpus->write_to_cgroup("lxc/$vmid");
232 eval {
233 $newset->write_to_cgroup("lxc/$vmid/ns");
234 };
235 if (my $err = $@) {
236 warn $err if !$rebalance_error_count->{$vmid}++;
237 # restore original
238 $cpuset->write_to_cgroup("lxc/$vmid");
239 } else {
240 # also apply to container root cgroup
241 $newset->write_to_cgroup("lxc/$vmid");
242 $rebalance_error_count->{$vmid} = 0;
243 }
0b959507 244 } else {
cbce367d 245 # old style container
0b959507 246 $newset->write_to_cgroup("lxc/$vmid");
b3f1adb2 247 $rebalance_error_count->{$vmid} = 0;
0b959507
DM
248 }
249 };
b3f1adb2
DM
250 if (my $err = $@) {
251 warn $err if !$rebalance_error_count->{$vmid}++;
252 }
0b959507
DM
253 };
254
e0dc09ad
DM
255 my $ctlist = PVE::LXC::config_list();
256
257 foreach my $vmid (sort keys %$ctlist) {
258 next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
41db757b
DM
259
260 my ($conf, $cpuset);
261 eval {
262
263 $conf = PVE::LXC::Config->load_config($vmid);
264
265 $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
266 };
267 if (my $err = $@) {
268 warn $err;
269 next;
270 }
271
272 my @cpuset_members = $cpuset->members();
273
8b750abc 274 if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
2499255b 275
8b750abc
DM
276 my $cores = $conf->{cores} || $cpucount;
277 $cores = $cpucount if $cores > $cpucount;
41db757b 278
2499255b
DM
279 # see if the number of cores was hot-reduced or
280 # hasn't been enacted at all yet
281 my $newset = PVE::CpuSet->new();
282 if ($cores < scalar(@cpuset_members)) {
283 for (my $i = 0; $i < $cores; $i++) {
284 $newset->insert($cpuset_members[$i]);
285 }
286 } elsif ($cores > scalar(@cpuset_members)) {
287 my $count = $newset->insert(@cpuset_members);
288 foreach my $cpu (@allowed_cpus) {
289 $count += $newset->insert($cpu);
290 last if $count >= $cores;
291 }
292 } else {
293 $newset->insert(@cpuset_members);
294 }
07f9595f 295
2499255b
DM
296 # Apply hot-plugged changes if any:
297 if (!$newset->is_equal($cpuset)) {
298 @cpuset_members = $newset->members();
0b959507 299 $modify_cpuset->($vmid, $cpuset, $newset);
2499255b 300 }
07f9595f 301
2499255b
DM
302 # Note: no need to rebalance if we already use all cores
303 push @balanced_cts, [$vmid, $cores, $newset]
8b750abc 304 if defined($conf->{cores}) && ($cores != $cpucount);
2499255b 305 }
07f9595f 306
2499255b 307 foreach my $cpu (@cpuset_members) {
ccfff920 308 $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
07f9595f 309 }
2499255b 310 }
07f9595f 311
2499255b
DM
312 my $find_best_cpu = sub {
313 my ($cpulist, $cpu) = @_;
07f9595f 314
2499255b
DM
315 my $cur_cost = $cpu_ctcount[$cpu];
316 my $cur_cpu = $cpu;
41db757b 317
2499255b
DM
318 foreach my $candidate (@$cpulist) {
319 my $cost = $cpu_ctcount[$candidate];
320 if ($cost < ($cur_cost -1)) {
321 $cur_cost = $cost;
322 $cur_cpu = $candidate;
323 }
07f9595f
DM
324 }
325
2499255b
DM
326 return $cur_cpu;
327 };
328
329 foreach my $bct (@balanced_cts) {
330 my ($vmid, $cores, $cpuset) = @$bct;
41db757b
DM
331
332 my $newset = PVE::CpuSet->new();
333
2499255b
DM
334 my $rest = [];
335 foreach my $cpu (@allowed_cpus) {
336 next if $cpuset->has($cpu);
337 push @$rest, $cpu;
338 }
339
340 my @members = $cpuset->members();
341 foreach my $cpu (@members) {
342 my $best = &$find_best_cpu($rest, $cpu);
343 if ($best != $cpu) {
344 $cpu_ctcount[$best]++;
345 $cpu_ctcount[$cpu]--;
346 }
347 $newset->insert($best);
41db757b
DM
348 }
349
350 if (!$newset->is_equal($cpuset)) {
0b959507 351 $modify_cpuset->($vmid, $cpuset, $newset);
41db757b
DM
352 }
353 }
354}
355
efd04666
DM
356sub update_lxc_status {
357 my ($status_cfg) = @_;
358
359 my $ctime = time();
efd04666
DM
360 my $vmstatus = PVE::LXC::vmstatus();
361
362 foreach my $vmid (keys %$vmstatus) {
363 my $d = $vmstatus->{$vmid};
364 my $template = $d->{template} ? $d->{template} : "0";
365 my $data;
366 if ($d->{status} eq 'running') { # running
50786956
DM
367 $data = $generate_rrd_string->(
368 [$d->{uptime}, $d->{name}, $d->{status}, $template,
369 $ctime, $d->{cpus}, $d->{cpu},
370 $d->{maxmem}, $d->{mem},
371 $d->{maxdisk}, $d->{disk},
372 $d->{netin}, $d->{netout},
373 $d->{diskread}, $d->{diskwrite}]);
efd04666 374 } else {
50786956
DM
375 $data = $generate_rrd_string->(
376 [0, $d->{name}, $d->{status}, $template, $ctime, $d->{cpus}, undef,
377 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
efd04666
DM
378 }
379 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
380
f1f4bfef 381 PVE::ExtMetric::update_all($status_cfg, 'lxc', $vmid, $d, $ctime, $nodename);
efd04666
DM
382 }
383}
384
385sub update_storage_status {
386 my ($status_cfg) = @_;
387
bbcfdc08 388 my $cfg = PVE::Storage::config();
efd04666 389 my $ctime = time();
efd04666
DM
390 my $info = PVE::Storage::storage_info($cfg);
391
392 foreach my $storeid (keys %$info) {
393 my $d = $info->{$storeid};
394 next if !$d->{active};
395
50786956 396 my $data = $generate_rrd_string->([$ctime, $d->{total}, $d->{used}]);
efd04666
DM
397
398 my $key = "pve2-storage/${nodename}/$storeid";
399 PVE::Cluster::broadcast_rrd($key, $data);
400
f1f4bfef 401 PVE::ExtMetric::update_all($status_cfg, 'storage', $nodename, $storeid, $d, $ctime);
efd04666
DM
402 }
403}
404
5ea29d13
FG
405sub rotate_authkeys {
406 PVE::AccessControl::rotate_authkey() if !PVE::AccessControl::check_authkey(1);
407}
408
a6dff455
TL
409sub update_ceph_metadata {
410 return if !PVE::Ceph::Tools::check_ceph_inited(1); # nothing to do
411
412 PVE::Ceph::Services::broadcast_ceph_services();
a78fd21f 413
1aaca6fd
TL
414 my ($version, $buildcommit, $vers_parts) = PVE::Ceph::Tools::get_local_version(1);
415
416
417 my $local_last_version = PVE::Cluster::get_node_kv('ceph-versions');
418
fea39196 419 if ($version) {
1aaca6fd 420 # FIXME: remove with 7.0 - for backward compat only
fea39196 421 PVE::Cluster::broadcast_node_kv("ceph-version", $version);
1aaca6fd
TL
422
423 my $node_versions = {
424 version => {
425 str => $version,
426 parts => $vers_parts,
427 },
428 buildcommit => $buildcommit,
429 };
430 PVE::Cluster::broadcast_node_kv("ceph-versions", encode_json($node_versions));
fea39196
DC
431 }
432}
433
a36565ba
AD
434sub update_sdn_status {
435
436 if($have_sdn) {
437 my ($transport_status, $vnet_status) = PVE::Network::SDN::status();
438
439 my $status = $transport_status ? encode_json($transport_status) : undef;
440 PVE::Cluster::broadcast_node_kv("sdn", $status);
441 }
442}
443
efd04666
DM
444sub update_status {
445
446 # update worker list. This is not really required and
447 # we just call this to make sure that we have a correct
448 # list in case of an unexpected crash.
8a9bf777
DM
449 my $rpcenv = PVE::RPCEnvironment::get();
450
efd04666 451 eval {
8a9bf777 452 my $tlist = $rpcenv->active_workers();
efd04666
DM
453 PVE::Cluster::broadcast_tasklist($tlist);
454 };
455 my $err = $@;
456 syslog('err', $err) if $err;
457
458 my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
459
460 eval {
461 update_node_status($status_cfg);
462 };
463 $err = $@;
464 syslog('err', "node status update error: $err") if $err;
465
466 eval {
467 update_qemu_status($status_cfg);
468 };
469 $err = $@;
470 syslog('err', "qemu status update error: $err") if $err;
471
472 eval {
473 update_lxc_status($status_cfg);
474 };
475 $err = $@;
476 syslog('err', "lxc status update error: $err") if $err;
477
e0dc09ad
DM
478 eval {
479 rebalance_lxc_containers();
480 };
481 $err = $@;
482 syslog('err', "lxc cpuset rebalance error: $err") if $err;
483
efd04666
DM
484 eval {
485 update_storage_status($status_cfg);
486 };
487 $err = $@;
488 syslog('err', "storage status update error: $err") if $err;
489
490 eval {
491 remove_stale_lxc_consoles();
492 };
493 $err = $@;
494 syslog('err', "lxc console cleanup error: $err") if $err;
5ea29d13
FG
495
496 eval {
497 rotate_authkeys();
498 };
499 $err = $@;
500 syslog('err', "authkey rotation error: $err") if $err;
501
fea39196 502 eval {
a6dff455 503 update_ceph_metadata();
fea39196
DC
504 };
505 $err = $@;
2a8e5149 506 syslog('err', "ceph metadata update error: $err") if $err;
fea39196 507
a36565ba
AD
508 eval {
509 update_sdn_status();
510 };
511 $err = $@;
512 syslog('err', "sdn status update error: $err") if $err;
513
efd04666
DM
514}
515
516my $next_update = 0;
517
518# do not update directly after startup, because install scripts
519# have a problem with that
520my $cycle = 0;
521my $updatetime = 10;
522
523my $initial_memory_usage;
524
525sub run {
526 my ($self) = @_;
527
528 for (;;) { # forever
529
530 $next_update = time() + $updatetime;
531
532 if ($cycle) {
533 my ($ccsec, $cusec) = gettimeofday ();
534 eval {
535 # syslog('info', "start status update");
536 PVE::Cluster::cfs_update();
537 update_status();
538 };
539 my $err = $@;
540
541 if ($err) {
542 syslog('err', "status update error: $err");
543 }
544
545 my ($ccsec_end, $cusec_end) = gettimeofday ();
546 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
547
548 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
549 if ($cptime > 5);
550 }
551
552 $cycle++;
553
554 my $mem = PVE::ProcFSTools::read_memory_usage();
555
556 if (!defined($initial_memory_usage) || ($cycle < 10)) {
557 $initial_memory_usage = $mem->{resident};
558 } else {
559 my $diff = $mem->{resident} - $initial_memory_usage;
560 if ($diff > 5*1024*1024) {
561 syslog ('info', "restarting server after $cycle cycles to " .
562 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
563 $self->restart_daemon();
564 }
565 }
566
567 my $wcount = 0;
568 while ((time() < $next_update) &&
569 ($wcount < $updatetime) && # protect against time wrap
570 !$restart_request) { $wcount++; sleep (1); };
571
572 $self->restart_daemon() if $restart_request;
573 }
574}
575
576$daemon->register_start_command();
577$daemon->register_restart_command(1);
578$daemon->register_stop_command();
579$daemon->register_status_command();
580
581our $cmddef = {
582 start => [ __PACKAGE__, 'start', []],
583 restart => [ __PACKAGE__, 'restart', []],
584 stop => [ __PACKAGE__, 'stop', []],
585 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
586};
587
efd04666
DM
5881;
589
efd04666
DM
590
591
592
593