]> git.proxmox.com Git - pve-manager.git/blame - PVE/Service/pvestatd.pm
rebalance_lxc_containers: avoid repeated warnings if rebalance fails
[pve-manager.git] / PVE / Service / pvestatd.pm
CommitLineData
efd04666
DM
1package PVE::Service::pvestatd;
2
3use strict;
4use warnings;
5
6use PVE::SafeSyslog;
7use PVE::Daemon;
8
9use Time::HiRes qw (gettimeofday);
10use PVE::Tools qw(dir_glob_foreach file_read_firstline);
11use PVE::ProcFSTools;
41db757b 12use PVE::CpuSet;
efd04666
DM
13use Filesys::Df;
14use PVE::INotify;
15use PVE::Cluster qw(cfs_read_file);
16use PVE::Storage;
17use PVE::QemuServer;
18use PVE::LXC;
41db757b 19use PVE::LXC::Config;
efd04666
DM
20use PVE::RPCEnvironment;
21use PVE::API2::Subscription;
22use PVE::AutoBalloon;
23
24use PVE::Status::Plugin;
25use PVE::Status::Graphite;
58541b94 26use PVE::Status::InfluxDB;
efd04666
DM
27
28PVE::Status::Graphite->register();
58541b94 29PVE::Status::InfluxDB->register();
efd04666
DM
30PVE::Status::Plugin->init();
31
32use base qw(PVE::Daemon);
33
34my $opt_debug;
35my $restart_request;
36
37my $nodename = PVE::INotify::nodename();
38
39my $cmdline = [$0, @ARGV];
40
41my %daemon_options = (restart_on_error => 5, stop_wait_time => 5);
42my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options);
43
44sub init {
45 my ($self) = @_;
46
47 $opt_debug = $self->{debug};
48
49 PVE::Cluster::cfs_update();
50}
51
52sub shutdown {
53 my ($self) = @_;
54
55 syslog('info' , "server closing");
56
57 # wait for children
58 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
59
60 $self->exit_daemon(0);
61}
62
63sub hup {
64 my ($self) = @_;
65
66 $restart_request = 1;
67}
68
69sub update_node_status {
70 my ($status_cfg) = @_;
71
72 my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg();
73
74 my $stat = PVE::ProcFSTools::read_proc_stat();
75
76 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
77
78 my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
79
80 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
81
82 my $maxcpu = $cpuinfo->{cpus};
83
84 my $subinfo = PVE::INotify::read_file('subscription');
85 my $sublevel = $subinfo->{level} || '';
86
87 # traffic from/to physical interface cards
88 my $netin = 0;
89 my $netout = 0;
90 foreach my $dev (keys %$netdev) {
91 next if $dev !~ m/^eth\d+$/;
92 $netin += $netdev->{$dev}->{receive};
93 $netout += $netdev->{$dev}->{transmit};
94 }
95
96 my $meminfo = PVE::ProcFSTools::read_meminfo();
97
98 my $dinfo = df('/', 1); # output is bytes
99
100 my $ctime = time();
101
102 # everything not free is considered to be used
103 my $dused = $dinfo->{blocks} - $dinfo->{bfree};
104
105 my $data = "$uptime:$sublevel:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" .
106 "$meminfo->{memtotal}:$meminfo->{memused}:" .
107 "$meminfo->{swaptotal}:$meminfo->{swapused}:" .
108 "$dinfo->{blocks}:$dused:$netin:$netout";
109
110 PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data);
111
112 foreach my $id (keys %{$status_cfg->{ids}}) {
113 my $plugin_config = $status_cfg->{ids}->{$id};
114 next if $plugin_config->{disable};
115 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
116
117 my $d = {};
118 $d->{uptime} = $uptime;
119 $d->{cpustat} = $stat;
120 $d->{cpustat}->{avg1} = $avg1;
121 $d->{cpustat}->{avg5} = $avg5;
122 $d->{cpustat}->{avg15} = $avg15;
123 $d->{cpustat}->{cpus} = $maxcpu;
124 $d->{memory} = $meminfo;
125 $d->{blockstat} = $dinfo;
126 $d->{nics} = $netdev;
127
128 $plugin->update_node_status($plugin_config, $nodename, $d, $ctime);
129 }
130}
131
132sub auto_balloning {
133 my ($vmstatus) = @_;
134
135 my $log = sub {
136 return if !$opt_debug;
137 print @_;
138 };
139
140 my $hostmeminfo = PVE::ProcFSTools::read_meminfo();
141
142 # to debug, run 'pvestatd -d' and set memtotal here
143 #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test
144
145 my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused};
146
147 # we try to use about 80% host memory
148 # goal: we want to change memory usage by this amount (positive or negative)
149 my $goal = int($hostmeminfo->{memtotal}*0.8 - $hostmeminfo->{memused});
150
151 my $maxchange = 100*1024*1024;
152 my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange);
153
154 &$log("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n");
155
156 foreach my $vmid (keys %$vmstatus) {
157 next if !$res->{$vmid};
158 my $d = $vmstatus->{$vmid};
159 my $diff = int($res->{$vmid} - $d->{balloon});
160 my $absdiff = $diff < 0 ? -$diff : $diff;
161 if ($absdiff > 0) {
162 &$log("BALLOON $vmid to $res->{$vmid} ($diff)\n");
163 eval {
164 PVE::QemuServer::vm_mon_cmd($vmid, "balloon",
165 value => int($res->{$vmid}));
166 };
167 warn $@ if $@;
168 }
169 }
170}
171
172sub update_qemu_status {
173 my ($status_cfg) = @_;
174
175 my $ctime = time();
176
177 my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
178
179 eval { auto_balloning($vmstatus); };
180 syslog('err', "auto ballooning error: $@") if $@;
181
182 foreach my $vmid (keys %$vmstatus) {
183 my $d = $vmstatus->{$vmid};
184 my $data;
185 my $status = $d->{qmpstatus} || $d->{status} || 'stopped';
186 my $template = $d->{template} ? $d->{template} : "0";
187 if ($d->{pid}) { # running
188 $data = "$d->{uptime}:$d->{name}:$status:$template:" .
189 "$ctime:$d->{cpus}:$d->{cpu}:" .
190 "$d->{maxmem}:$d->{mem}:" .
191 "$d->{maxdisk}:$d->{disk}:" .
192 "$d->{netin}:$d->{netout}:" .
193 "$d->{diskread}:$d->{diskwrite}";
194 } else {
195 $data = "0:$d->{name}:$status:$template:$ctime:$d->{cpus}::" .
196 "$d->{maxmem}::" .
197 "$d->{maxdisk}:$d->{disk}:" .
198 ":::";
199 }
200 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
201
202 foreach my $id (keys %{$status_cfg->{ids}}) {
203 my $plugin_config = $status_cfg->{ids}->{$id};
204 next if $plugin_config->{disable};
205 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
206 $plugin->update_qemu_status($plugin_config, $vmid, $d, $ctime);
207 }
208 }
209}
210
211sub remove_stale_lxc_consoles {
212
213 my $vmstatus = PVE::LXC::vmstatus();
214 my $pidhash = PVE::LXC::find_lxc_console_pids();
215
216 foreach my $vmid (keys %$pidhash) {
217 next if defined($vmstatus->{$vmid});
218 syslog('info', "remove stale lxc-console for CT $vmid");
219 foreach my $pid (@{$pidhash->{$vmid}}) {
220 kill(9, $pid);
221 }
222 }
223}
224
b3f1adb2
DM
225my $rebalance_error_count = {};
226
41db757b 227sub rebalance_lxc_containers {
41db757b
DM
228
229 return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
230
231 my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
232 my @allowed_cpus = $all_cpus->members();
233 my $cpucount = scalar(@allowed_cpus);
ccfff920 234 my $max_cpuid = PVE::CpuSet::max_cpuid();
41db757b 235
ccfff920 236 my @cpu_ctcount = (0) x $max_cpuid;
41db757b
DM
237 my @balanced_cts;
238
0b959507
DM
239 my $modify_cpuset = sub {
240 my ($vmid, $cpuset, $newset) = @_;
241
b3f1adb2
DM
242 if (!$rebalance_error_count->{$vmid}) {
243 syslog('info', "modified cpu set for lxc/$vmid: " .
244 $newset->short_string());
245 }
246
0b959507
DM
247 eval {
248 # allow all, so that we can set new cpuset in /ns
249 $all_cpus->write_to_cgroup("lxc/$vmid");
250 eval {
251 $newset->write_to_cgroup("lxc/$vmid/ns");
252 };
253 if (my $err = $@) {
b3f1adb2 254 warn $err if !$rebalance_error_count->{$vmid}++;
0b959507
DM
255 # restore original
256 $cpuset->write_to_cgroup("lxc/$vmid");
257 } else {
258 # also apply to container root cgroup
259 $newset->write_to_cgroup("lxc/$vmid");
b3f1adb2 260 $rebalance_error_count->{$vmid} = 0;
0b959507
DM
261 }
262 };
b3f1adb2
DM
263 if (my $err = $@) {
264 warn $err if !$rebalance_error_count->{$vmid}++;
265 }
0b959507
DM
266 };
267
e0dc09ad
DM
268 my $ctlist = PVE::LXC::config_list();
269
270 foreach my $vmid (sort keys %$ctlist) {
271 next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
41db757b
DM
272
273 my ($conf, $cpuset);
274 eval {
275
276 $conf = PVE::LXC::Config->load_config($vmid);
277
278 $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
279 };
280 if (my $err = $@) {
281 warn $err;
282 next;
283 }
284
285 my @cpuset_members = $cpuset->members();
286
8b750abc 287 if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
2499255b 288
8b750abc
DM
289 my $cores = $conf->{cores} || $cpucount;
290 $cores = $cpucount if $cores > $cpucount;
41db757b 291
2499255b
DM
292 # see if the number of cores was hot-reduced or
293 # hasn't been enacted at all yet
294 my $newset = PVE::CpuSet->new();
295 if ($cores < scalar(@cpuset_members)) {
296 for (my $i = 0; $i < $cores; $i++) {
297 $newset->insert($cpuset_members[$i]);
298 }
299 } elsif ($cores > scalar(@cpuset_members)) {
300 my $count = $newset->insert(@cpuset_members);
301 foreach my $cpu (@allowed_cpus) {
302 $count += $newset->insert($cpu);
303 last if $count >= $cores;
304 }
305 } else {
306 $newset->insert(@cpuset_members);
307 }
07f9595f 308
2499255b
DM
309 # Apply hot-plugged changes if any:
310 if (!$newset->is_equal($cpuset)) {
311 @cpuset_members = $newset->members();
0b959507 312 $modify_cpuset->($vmid, $cpuset, $newset);
2499255b 313 }
07f9595f 314
2499255b
DM
315 # Note: no need to rebalance if we already use all cores
316 push @balanced_cts, [$vmid, $cores, $newset]
8b750abc 317 if defined($conf->{cores}) && ($cores != $cpucount);
2499255b 318 }
07f9595f 319
2499255b 320 foreach my $cpu (@cpuset_members) {
ccfff920 321 $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
07f9595f 322 }
2499255b 323 }
07f9595f 324
2499255b
DM
325 my $find_best_cpu = sub {
326 my ($cpulist, $cpu) = @_;
07f9595f 327
2499255b
DM
328 my $cur_cost = $cpu_ctcount[$cpu];
329 my $cur_cpu = $cpu;
41db757b 330
2499255b
DM
331 foreach my $candidate (@$cpulist) {
332 my $cost = $cpu_ctcount[$candidate];
333 if ($cost < ($cur_cost -1)) {
334 $cur_cost = $cost;
335 $cur_cpu = $candidate;
336 }
07f9595f
DM
337 }
338
2499255b
DM
339 return $cur_cpu;
340 };
341
342 foreach my $bct (@balanced_cts) {
343 my ($vmid, $cores, $cpuset) = @$bct;
41db757b
DM
344
345 my $newset = PVE::CpuSet->new();
346
2499255b
DM
347 my $rest = [];
348 foreach my $cpu (@allowed_cpus) {
349 next if $cpuset->has($cpu);
350 push @$rest, $cpu;
351 }
352
353 my @members = $cpuset->members();
354 foreach my $cpu (@members) {
355 my $best = &$find_best_cpu($rest, $cpu);
356 if ($best != $cpu) {
357 $cpu_ctcount[$best]++;
358 $cpu_ctcount[$cpu]--;
359 }
360 $newset->insert($best);
41db757b
DM
361 }
362
363 if (!$newset->is_equal($cpuset)) {
0b959507 364 $modify_cpuset->($vmid, $cpuset, $newset);
41db757b
DM
365 }
366 }
367}
368
efd04666
DM
369sub update_lxc_status {
370 my ($status_cfg) = @_;
371
372 my $ctime = time();
373
374 my $vmstatus = PVE::LXC::vmstatus();
375
376 foreach my $vmid (keys %$vmstatus) {
377 my $d = $vmstatus->{$vmid};
378 my $template = $d->{template} ? $d->{template} : "0";
379 my $data;
380 if ($d->{status} eq 'running') { # running
381 $data = "$d->{uptime}:$d->{name}:$d->{status}:$template:" .
382 "$ctime:$d->{cpus}:$d->{cpu}:" .
383 "$d->{maxmem}:$d->{mem}:" .
384 "$d->{maxdisk}:$d->{disk}:" .
385 "$d->{netin}:$d->{netout}:" .
386 "$d->{diskread}:$d->{diskwrite}";
387 } else {
388 $data = "0:$d->{name}:$d->{status}:$template:$ctime:$d->{cpus}::" .
389 "$d->{maxmem}::" .
390 "$d->{maxdisk}:$d->{disk}:" .
391 ":::";
392 }
393 PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data);
394
395 foreach my $id (keys %{$status_cfg->{ids}}) {
396 my $plugin_config = $status_cfg->{ids}->{$id};
397 next if $plugin_config->{disable};
398 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
399 $plugin->update_lxc_status($plugin_config, $vmid, $d, $ctime);
400 }
401 }
402}
403
404sub update_storage_status {
405 my ($status_cfg) = @_;
406
bbcfdc08 407 my $cfg = PVE::Storage::config();
efd04666
DM
408
409 my $ctime = time();
410
411 my $info = PVE::Storage::storage_info($cfg);
412
413 foreach my $storeid (keys %$info) {
414 my $d = $info->{$storeid};
415 next if !$d->{active};
416
417 my $data = "$ctime:$d->{total}:$d->{used}";
418
419 my $key = "pve2-storage/${nodename}/$storeid";
420 PVE::Cluster::broadcast_rrd($key, $data);
421
422 foreach my $id (keys %{$status_cfg->{ids}}) {
423 my $plugin_config = $status_cfg->{ids}->{$id};
424 next if $plugin_config->{disable};
425 my $plugin = PVE::Status::Plugin->lookup($plugin_config->{type});
426 $plugin->update_storage_status($plugin_config, $nodename, $storeid, $d, $ctime);
427 }
428 }
429}
430
431sub update_status {
432
433 # update worker list. This is not really required and
434 # we just call this to make sure that we have a correct
435 # list in case of an unexpected crash.
436 eval {
437 my $tlist = PVE::RPCEnvironment::active_workers();
438 PVE::Cluster::broadcast_tasklist($tlist);
439 };
440 my $err = $@;
441 syslog('err', $err) if $err;
442
443 my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
444
445 eval {
446 update_node_status($status_cfg);
447 };
448 $err = $@;
449 syslog('err', "node status update error: $err") if $err;
450
451 eval {
452 update_qemu_status($status_cfg);
453 };
454 $err = $@;
455 syslog('err', "qemu status update error: $err") if $err;
456
457 eval {
458 update_lxc_status($status_cfg);
459 };
460 $err = $@;
461 syslog('err', "lxc status update error: $err") if $err;
462
e0dc09ad
DM
463 eval {
464 rebalance_lxc_containers();
465 };
466 $err = $@;
467 syslog('err', "lxc cpuset rebalance error: $err") if $err;
468
efd04666
DM
469 eval {
470 update_storage_status($status_cfg);
471 };
472 $err = $@;
473 syslog('err', "storage status update error: $err") if $err;
474
475 eval {
476 remove_stale_lxc_consoles();
477 };
478 $err = $@;
479 syslog('err', "lxc console cleanup error: $err") if $err;
480}
481
482my $next_update = 0;
483
484# do not update directly after startup, because install scripts
485# have a problem with that
486my $cycle = 0;
487my $updatetime = 10;
488
489my $initial_memory_usage;
490
491sub run {
492 my ($self) = @_;
493
494 for (;;) { # forever
495
496 $next_update = time() + $updatetime;
497
498 if ($cycle) {
499 my ($ccsec, $cusec) = gettimeofday ();
500 eval {
501 # syslog('info', "start status update");
502 PVE::Cluster::cfs_update();
503 update_status();
504 };
505 my $err = $@;
506
507 if ($err) {
508 syslog('err', "status update error: $err");
509 }
510
511 my ($ccsec_end, $cusec_end) = gettimeofday ();
512 my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000;
513
514 syslog('info', sprintf("status update time (%.3f seconds)", $cptime))
515 if ($cptime > 5);
516 }
517
518 $cycle++;
519
520 my $mem = PVE::ProcFSTools::read_memory_usage();
521
522 if (!defined($initial_memory_usage) || ($cycle < 10)) {
523 $initial_memory_usage = $mem->{resident};
524 } else {
525 my $diff = $mem->{resident} - $initial_memory_usage;
526 if ($diff > 5*1024*1024) {
527 syslog ('info', "restarting server after $cycle cycles to " .
528 "reduce memory usage (free $mem->{resident} ($diff) bytes)");
529 $self->restart_daemon();
530 }
531 }
532
533 my $wcount = 0;
534 while ((time() < $next_update) &&
535 ($wcount < $updatetime) && # protect against time wrap
536 !$restart_request) { $wcount++; sleep (1); };
537
538 $self->restart_daemon() if $restart_request;
539 }
540}
541
542$daemon->register_start_command();
543$daemon->register_restart_command(1);
544$daemon->register_stop_command();
545$daemon->register_status_command();
546
547our $cmddef = {
548 start => [ __PACKAGE__, 'start', []],
549 restart => [ __PACKAGE__, 'restart', []],
550 stop => [ __PACKAGE__, 'stop', []],
551 status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ],
552};
553
554#my $cmd = shift;
555#PVE::CLIHandler::handle_cmd($cmddef, $0, $cmd, \@ARGV, undef, $0);
556#exit (0);
557
5581;
559
560__END__
561
562=head1 NAME
563
564pvestatd - PVE Status Daemon
565
566=head1 SYNOPSIS
567
568=include synopsis
569
570=head1 DESCRIPTION
571
572This daemom queries the status of VMs, storages and containers at
573regular intervals. The result is sent to all nodes in the cluster.
574
575=include pve_copyright
576
577
578
579
580