]> git.proxmox.com Git - pve-manager.git/blame - PVE/Service/pvestatd.pm
statd: refactor out updating ceph metadata
[pve-manager.git] / PVE / Service / pvestatd.pm
CommitLineData
efd04666
DM
1package PVE::Service::pvestatd;
2
3use strict;
4use warnings;
5
6use PVE::SafeSyslog;
7use PVE::Daemon;
8
fea39196
DC
9use JSON;
10
efd04666
DM
11use Time::HiRes qw (gettimeofday);
12use PVE::Tools qw(dir_glob_foreach file_read_firstline);
13use PVE::ProcFSTools;
41db757b 14use PVE::CpuSet;
efd04666
DM
15use Filesys::Df;
16use PVE::INotify;
0fcced16 17use PVE::Network;
efd04666
DM
18use PVE::Cluster qw(cfs_read_file);
19use PVE::Storage;
20use PVE::QemuServer;
21use PVE::LXC;
41db757b 22use PVE::LXC::Config;
efd04666
DM
23use PVE::RPCEnvironment;
24use PVE::API2::Subscription;
25use PVE::AutoBalloon;
5ea29d13 26use PVE::AccessControl;
fea39196
DC
27use PVE::Ceph::Services;
28use PVE::Ceph::Tools;
efd04666
DM
29
30use PVE::Status::Plugin;
31use PVE::Status::Graphite;
58541b94 32use PVE::Status::InfluxDB;
efd04666
DM
33
34PVE::Status::Graphite->register();
58541b94 35PVE::Status::InfluxDB->register();
efd04666
DM
36PVE::Status::Plugin->init();
37
38use base qw(PVE::Daemon);
39
a36565ba
AD
40my $have_sdn;
41eval {
42 require PVE::API2::Network::SDN;
43 $have_sdn = 1;
44};
45
efd04666
DM
46my $opt_debug;
47my $restart_request;
48
49my $nodename = PVE::INotify::nodename();
50
51my $cmdline = [$0, @ARGV];
52
53my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
54my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
55
56sub init {
57 my ($self) = @_;
58
59 $opt_debug = $self->{debug};
60
61 PVE::Cluster::cfs_update();
62}
63
64sub shutdown {
65 my ($self) = @_;
66
67 syslog('info' , "server closing");
68
69 # wait for children
70 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
71
72 $self->exit_daemon(0);
73}
74
75sub hup {
76 my ($self) = @_;
77
78 $restart_request = 1;
79}
80
50786956
DM
81my $generate_rrd_string = sub {
82 my ($data) = @_;
83
84 return join(':', map { $_ // 'U' } @$data);
85};
86
efd04666
DM
87sub update_node_status {
88 my ($status_cfg) = @_;
89
efd04666
DM
90 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
91
78873100
TL
92 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
93 my $stat = PVE::ProcFSTools::read_proc_stat();
efd04666 94 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
78873100 95 my $maxcpu = $cpuinfo->{cpus};
efd04666
DM
96
97 my $subinfo = PVE::INotify::read_file('subscription');
98 my $sublevel = $subinfo->{level} || '';
99
78873100 100 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
efd04666 101 # traffic from/to physical interface cards
78873100
TL
102 my ($netin, $netout) = (0, 0);
103 for my $dev (grep { /^$PVE::Network::PHYSICAL_NIC_RE$/ } keys %$netdev) {
efd04666
DM
104 $netin += $netdev->{$dev}->{receive};
105 $netout += $netdev->{$dev}->{transmit};
106 }
78873100 107
efd04666
DM
108 my $meminfo = PVE::ProcFSTools::read_meminfo();
109
110 my $dinfo = df('/', 1); # output is bytes
efd04666
DM
111 # everything not free is considered to be used
112 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
113
78873100
TL
114 my $ctime = time();
115
50786956
DM
116 my $data = $generate_rrd_string->(
117 [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu}, $stat->{wait},
118 $meminfo->{memtotal}, $meminfo->{memused},
119 $meminfo->{swaptotal}, $meminfo->{swapused},
78873100
TL
120 $dinfo->{blocks}, $dused, $netin, $netout]
121 );
efd04666
DM
122 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
123
5e82aaac
TL
124 my $node_metric = {
125 uptime => $uptime,
126 cpustat => $stat,
127 memory => $meminfo,
128 blockstat => $dinfo,
129 nics => $netdev,
130 };
131 $node_metric->{cpustat}->@{qw(avg1 avg5 avg15)} = ($avg1, $avg5, $avg15);
132 $node_metric->{cpustat}->{cpus} = $maxcpu;
133
134 PVE::Status::Plugin::update_all($status_cfg, 'node', $nodename, $node_metric, $ctime);
efd04666
DM
135}
136
137sub auto_balloning {
138 my ($vmstatus) = @_;
139
0dd73a7f 140 my $log = sub { $opt_debug and printf @_ };
efd04666
DM
141
142 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
0dd73a7f 143 # NOTE: to debug, run 'pvestatd -d' and set memtotal here
efd04666 144 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
efd04666
DM
145 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
146
0dd73a7f
TL
147 # try to use ~80% host memory; goal is the change amount required to achieve that
148 my $goal = int($hostmeminfo->{memtotal} * 0.8 - $hostmeminfo->{memused});
149 $log->("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
efd04666
DM
150
151 my $maxchange = 100*1024*1024;
152 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
efd04666 153
0dd73a7f
TL
154 for my $vmid (sort keys %$res) {
155 my $target = int($res->{$vmid});
156 my $current = int($vmstatus->{$vmid}->{balloon});
157 next if $target == $current; # no need to change
158
159 $log->("BALLOON $vmid to $target (%d)\n", $target - $current);
160 eval { PVE::QemuServer::vm_mon_cmd($vmid, "balloon", value => $target) };
161 warn $@ if $@;
efd04666
DM
162 }
163}
164
165sub update_qemu_status {
166 my ($status_cfg) = @_;
167
168 my $ctime = time();
169
170 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
171
172 eval { auto_balloning($vmstatus); };
173 syslog('err', "auto ballooning error: $@") if $@;
174
175 foreach my $vmid (keys %$vmstatus) {
176 my $d = $vmstatus->{$vmid};
177 my $data;
178 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
179 my $template = $d->{template} ? $d->{template} : "0";
180 if ($d->{pid}) { # running
50786956
DM
181 $data = $generate_rrd_string->(
182 [$d->{uptime}, $d->{name}, $status, $template, $ctime, $d->{cpus}, $d->{cpu},
183 $d->{maxmem}, $d->{mem}, $d->{maxdisk}, $d->{disk},
184 $d->{netin}, $d->{netout}, $d->{diskread}, $d->{diskwrite}]);
efd04666 185 } else {
50786956
DM
186 $data = $generate_rrd_string->(
187 [0, $d->{name}, $status, $template, $ctime, $d->{cpus}, undef,
188 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
efd04666
DM
189 }
190 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
191
5e82aaac 192 PVE::Status::Plugin::update_all($status_cfg, 'qemu', $vmid, $d, $ctime, $nodename);
efd04666
DM
193 }
194}
195
196sub remove_stale_lxc_consoles {
197
198 my $vmstatus = PVE::LXC::vmstatus();
199 my $pidhash = PVE::LXC::find_lxc_console_pids();
200
201 foreach my $vmid (keys %$pidhash) {
202 next if defined($vmstatus->{$vmid});
203 syslog('info', "remove stale lxc-console for CT $vmid");
204 foreach my $pid (@{$pidhash->{$vmid}}) {
205 kill(9, $pid);
206 }
207 }
208}
209
b3f1adb2
DM
210my $rebalance_error_count = {};
211
41db757b 212sub rebalance_lxc_containers {
41db757b
DM
213
214 return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
215
216 my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
217 my @allowed_cpus = $all_cpus->members();
218 my $cpucount = scalar(@allowed_cpus);
127470f4 219 my $max_cpuid = $allowed_cpus[-1];
41db757b 220
127470f4 221 my @cpu_ctcount = (0) x ($max_cpuid+1);
41db757b
DM
222 my @balanced_cts;
223
0b959507
DM
224 my $modify_cpuset = sub {
225 my ($vmid, $cpuset, $newset) = @_;
226
b3f1adb2
DM
227 if (!$rebalance_error_count->{$vmid}) {
228 syslog('info', "modified cpu set for lxc/$vmid: " .
229 $newset->short_string());
230 }
231
0b959507 232 eval {
cbce367d
DM
233
234 if (-d "/sys/fs/cgroup/cpuset/lxc/$vmid/ns") {
235 # allow all, so that we can set new cpuset in /ns
236 $all_cpus->write_to_cgroup("lxc/$vmid");
237 eval {
238 $newset->write_to_cgroup("lxc/$vmid/ns");
239 };
240 if (my $err = $@) {
241 warn $err if !$rebalance_error_count->{$vmid}++;
242 # restore original
243 $cpuset->write_to_cgroup("lxc/$vmid");
244 } else {
245 # also apply to container root cgroup
246 $newset->write_to_cgroup("lxc/$vmid");
247 $rebalance_error_count->{$vmid} = 0;
248 }
0b959507 249 } else {
cbce367d 250 # old style container
0b959507 251 $newset->write_to_cgroup("lxc/$vmid");
b3f1adb2 252 $rebalance_error_count->{$vmid} = 0;
0b959507
DM
253 }
254 };
b3f1adb2
DM
255 if (my $err = $@) {
256 warn $err if !$rebalance_error_count->{$vmid}++;
257 }
0b959507
DM
258 };
259
e0dc09ad
DM
260 my $ctlist = PVE::LXC::config_list();
261
262 foreach my $vmid (sort keys %$ctlist) {
263 next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
41db757b
DM
264
265 my ($conf, $cpuset);
266 eval {
267
268 $conf = PVE::LXC::Config->load_config($vmid);
269
270 $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
271 };
272 if (my $err = $@) {
273 warn $err;
274 next;
275 }
276
277 my @cpuset_members = $cpuset->members();
278
8b750abc 279 if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
2499255b 280
8b750abc
DM
281 my $cores = $conf->{cores} || $cpucount;
282 $cores = $cpucount if $cores > $cpucount;
41db757b 283
2499255b
DM
284 # see if the number of cores was hot-reduced or
285 # hasn't been enacted at all yet
286 my $newset = PVE::CpuSet->new();
287 if ($cores < scalar(@cpuset_members)) {
288 for (my $i = 0; $i < $cores; $i++) {
289 $newset->insert($cpuset_members[$i]);
290 }
291 } elsif ($cores > scalar(@cpuset_members)) {
292 my $count = $newset->insert(@cpuset_members);
293 foreach my $cpu (@allowed_cpus) {
294 $count += $newset->insert($cpu);
295 last if $count >= $cores;
296 }
297 } else {
298 $newset->insert(@cpuset_members);
299 }
07f9595f 300
2499255b
DM
301 # Apply hot-plugged changes if any:
302 if (!$newset->is_equal($cpuset)) {
303 @cpuset_members = $newset->members();
0b959507 304 $modify_cpuset->($vmid, $cpuset, $newset);
2499255b 305 }
07f9595f 306
2499255b
DM
307 # Note: no need to rebalance if we already use all cores
308 push @balanced_cts, [$vmid, $cores, $newset]
8b750abc 309 if defined($conf->{cores}) && ($cores != $cpucount);
2499255b 310 }
07f9595f 311
2499255b 312 foreach my $cpu (@cpuset_members) {
ccfff920 313 $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
07f9595f 314 }
2499255b 315 }
07f9595f 316
2499255b
DM
317 my $find_best_cpu = sub {
318 my ($cpulist, $cpu) = @_;
07f9595f 319
2499255b
DM
320 my $cur_cost = $cpu_ctcount[$cpu];
321 my $cur_cpu = $cpu;
41db757b 322
2499255b
DM
323 foreach my $candidate (@$cpulist) {
324 my $cost = $cpu_ctcount[$candidate];
325 if ($cost < ($cur_cost -1)) {
326 $cur_cost = $cost;
327 $cur_cpu = $candidate;
328 }
07f9595f
DM
329 }
330
2499255b
DM
331 return $cur_cpu;
332 };
333
334 foreach my $bct (@balanced_cts) {
335 my ($vmid, $cores, $cpuset) = @$bct;
41db757b
DM
336
337 my $newset = PVE::CpuSet->new();
338
2499255b
DM
339 my $rest = [];
340 foreach my $cpu (@allowed_cpus) {
341 next if $cpuset->has($cpu);
342 push @$rest, $cpu;
343 }
344
345 my @members = $cpuset->members();
346 foreach my $cpu (@members) {
347 my $best = &$find_best_cpu($rest, $cpu);
348 if ($best != $cpu) {
349 $cpu_ctcount[$best]++;
350 $cpu_ctcount[$cpu]--;
351 }
352 $newset->insert($best);
41db757b
DM
353 }
354
355 if (!$newset->is_equal($cpuset)) {
0b959507 356 $modify_cpuset->($vmid, $cpuset, $newset);
41db757b
DM
357 }
358 }
359}
360
efd04666
DM
361sub update_lxc_status {
362 my ($status_cfg) = @_;
363
364 my $ctime = time();
efd04666
DM
365 my $vmstatus = PVE::LXC::vmstatus();
366
367 foreach my $vmid (keys %$vmstatus) {
368 my $d = $vmstatus->{$vmid};
369 my $template = $d->{template} ? $d->{template} : "0";
370 my $data;
371 if ($d->{status} eq 'running') { # running
50786956
DM
372 $data = $generate_rrd_string->(
373 [$d->{uptime}, $d->{name}, $d->{status}, $template,
374 $ctime, $d->{cpus}, $d->{cpu},
375 $d->{maxmem}, $d->{mem},
376 $d->{maxdisk}, $d->{disk},
377 $d->{netin}, $d->{netout},
378 $d->{diskread}, $d->{diskwrite}]);
efd04666 379 } else {
50786956
DM
380 $data = $generate_rrd_string->(
381 [0, $d->{name}, $d->{status}, $template, $ctime, $d->{cpus}, undef,
382 $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]);
efd04666
DM
383 }
384 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
385
5e82aaac 386 PVE::Status::Plugin::update_all($status_cfg, 'lxc', $vmid, $d, $ctime, $nodename);
efd04666
DM
387 }
388}
389
390sub update_storage_status {
391 my ($status_cfg) = @_;
392
bbcfdc08 393 my $cfg = PVE::Storage::config();
efd04666 394 my $ctime = time();
efd04666
DM
395 my $info = PVE::Storage::storage_info($cfg);
396
397 foreach my $storeid (keys %$info) {
398 my $d = $info->{$storeid};
399 next if !$d->{active};
400
50786956 401 my $data = $generate_rrd_string->([$ctime, $d->{total}, $d->{used}]);
efd04666
DM
402
403 my $key = "pve2-storage/${nodename}/$storeid";
404 PVE::Cluster::broadcast_rrd($key, $data);
405
5e82aaac 406 PVE::Status::Plugin::update_all($status_cfg, 'storage', $nodename, $storeid, $d, $ctime);
efd04666
DM
407 }
408}
409
5ea29d13
FG
410sub rotate_authkeys {
411 PVE::AccessControl::rotate_authkey() if !PVE::AccessControl::check_authkey(1);
412}
413
a6dff455
TL
414sub update_ceph_metadata {
415 return if !PVE::Ceph::Tools::check_ceph_inited(1); # nothing to do
416
417 PVE::Ceph::Services::broadcast_ceph_services();
a78fd21f 418
a6dff455 419 my ($version) = PVE::Ceph::Tools::get_local_version(1);
fea39196
DC
420 if ($version) {
421 PVE::Cluster::broadcast_node_kv("ceph-version", $version);
422 }
423}
424
a36565ba
AD
425sub update_sdn_status {
426
427 if($have_sdn) {
428 my ($transport_status, $vnet_status) = PVE::Network::SDN::status();
429
430 my $status = $transport_status ? encode_json($transport_status) : undef;
431 PVE::Cluster::broadcast_node_kv("sdn", $status);
432 }
433}
434
efd04666
DM
435sub update_status {
436
437 # update worker list. This is not really required and
438 # we just call this to make sure that we have a correct
439 # list in case of an unexpected crash.
8a9bf777
DM
440 my $rpcenv = PVE::RPCEnvironment::get();
441
efd04666 442 eval {
8a9bf777 443 my $tlist = $rpcenv->active_workers();
efd04666
DM
444 PVE::Cluster::broadcast_tasklist($tlist);
445 };
446 my $err = $@;
447 syslog('err', $err) if $err;
448
449 my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
450
451 eval {
452 update_node_status($status_cfg);
453 };
454 $err = $@;
455 syslog('err', "node status update error: $err") if $err;
456
457 eval {
458 update_qemu_status($status_cfg);
459 };
460 $err = $@;
461 syslog('err', "qemu status update error: $err") if $err;
462
463 eval {
464 update_lxc_status($status_cfg);
465 };
466 $err = $@;
467 syslog('err', "lxc status update error: $err") if $err;
468
e0dc09ad
DM
469 eval {
470 rebalance_lxc_containers();
471 };
472 $err = $@;
473 syslog('err', "lxc cpuset rebalance error: $err") if $err;
474
efd04666
DM
475 eval {
476 update_storage_status($status_cfg);
477 };
478 $err = $@;
479 syslog('err', "storage status update error: $err") if $err;
480
481 eval {
482 remove_stale_lxc_consoles();
483 };
484 $err = $@;
485 syslog('err', "lxc console cleanup error: $err") if $err;
5ea29d13
FG
486
487 eval {
488 rotate_authkeys();
489 };
490 $err = $@;
491 syslog('err', "authkey rotation error: $err") if $err;
492
fea39196 493 eval {
a6dff455 494 update_ceph_metadata();
fea39196
DC
495 };
496 $err = $@;
497 syslog('err', "error getting ceph services: $err") if $err;
498
a36565ba
AD
499 eval {
500 update_sdn_status();
501 };
502 $err = $@;
503 syslog('err', "sdn status update error: $err") if $err;
504
efd04666
DM
505}
506
507my $next_update = 0;
508
509# do not update directly after startup, because install scripts
510# have a problem with that
511my $cycle = 0;
512my $updatetime = 10;
513
514my $initial_memory_usage;
515
516sub run {
517 my ($self) = @_;
518
519 for (;;) { # forever
520
521 $next_update = time() + $updatetime;
522
523 if ($cycle) {
524 my ($ccsec, $cusec) = gettimeofday ();
525 eval {
526 # syslog('info', "start status update");
527 PVE::Cluster::cfs_update();
528 update_status();
529 };
530 my $err = $@;
531
532 if ($err) {
533 syslog('err', "status update error: $err");
534 }
535
536 my ($ccsec_end, $cusec_end) = gettimeofday ();
537 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
538
539 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
540 if ($cptime > 5);
541 }
542
543 $cycle++;
544
545 my $mem = PVE::ProcFSTools::read_memory_usage();
546
547 if (!defined($initial_memory_usage) || ($cycle < 10)) {
548 $initial_memory_usage = $mem->{resident};
549 } else {
550 my $diff = $mem->{resident} - $initial_memory_usage;
551 if ($diff > 5*1024*1024) {
552 syslog ('info', "restarting server after $cycle cycles to " .
553 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
554 $self->restart_daemon();
555 }
556 }
557
558 my $wcount = 0;
559 while ((time() < $next_update) &&
560 ($wcount < $updatetime) && # protect against time wrap
561 !$restart_request) { $wcount++; sleep (1); };
562
563 $self->restart_daemon() if $restart_request;
564 }
565}
566
567$daemon->register_start_command();
568$daemon->register_restart_command(1);
569$daemon->register_stop_command();
570$daemon->register_status_command();
571
572our $cmddef = {
573 start => [ __PACKAGE__, 'start', []],
574 restart => [ __PACKAGE__, 'restart', []],
575 stop => [ __PACKAGE__, 'stop', []],
576 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
577};
578
efd04666
DM
5791;
580
efd04666
DM
581
582
583
584