]>
Commit | Line | Data |
---|---|---|
efd04666 DM |
1 | package PVE::Service::pvestatd; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use PVE::SafeSyslog; | |
7 | use PVE::Daemon; | |
8 | ||
fea39196 DC |
9 | use JSON; |
10 | ||
efd04666 DM |
11 | use Time::HiRes qw (gettimeofday); |
12 | use PVE::Tools qw(dir_glob_foreach file_read_firstline); | |
13 | use PVE::ProcFSTools; | |
41db757b | 14 | use PVE::CpuSet; |
efd04666 DM |
15 | use Filesys::Df; |
16 | use PVE::INotify; | |
0fcced16 | 17 | use PVE::Network; |
efd04666 DM |
18 | use PVE::Cluster qw(cfs_read_file); |
19 | use PVE::Storage; | |
20 | use PVE::QemuServer; | |
7a108020 | 21 | use PVE::QemuServer::Monitor; |
efd04666 | 22 | use PVE::LXC; |
ce251651 | 23 | use PVE::CGroup; |
41db757b | 24 | use PVE::LXC::Config; |
efd04666 DM |
25 | use PVE::RPCEnvironment; |
26 | use PVE::API2::Subscription; | |
27 | use PVE::AutoBalloon; | |
5ea29d13 | 28 | use PVE::AccessControl; |
fea39196 DC |
29 | use PVE::Ceph::Services; |
30 | use PVE::Ceph::Tools; | |
a6a681b9 | 31 | use PVE::pvecfg; |
efd04666 | 32 | |
f1f4bfef | 33 | use PVE::ExtMetric; |
efd04666 | 34 | use PVE::Status::Plugin; |
efd04666 DM |
35 | |
36 | use base qw(PVE::Daemon); | |
37 | ||
a36565ba AD |
38 | my $have_sdn; |
39 | eval { | |
74058057 | 40 | require PVE::Network::SDN; |
a36565ba AD |
41 | $have_sdn = 1; |
42 | }; | |
43 | ||
efd04666 DM |
44 | my $opt_debug; |
45 | my $restart_request; | |
46 | ||
47 | my $nodename = PVE::INotify::nodename(); | |
48 | ||
49 | my $cmdline = [$0, @ARGV]; | |
50 | ||
51 | my %daemon_options = (restart_on_error => 5, stop_wait_time => 5); | |
52 | my $daemon = __PACKAGE__->new('pvestatd', $cmdline, %daemon_options); | |
53 | ||
54 | sub init { | |
55 | my ($self) = @_; | |
56 | ||
57 | $opt_debug = $self->{debug}; | |
58 | ||
59 | PVE::Cluster::cfs_update(); | |
60 | } | |
61 | ||
62 | sub shutdown { | |
63 | my ($self) = @_; | |
64 | ||
65 | syslog('info' , "server closing"); | |
66 | ||
67 | # wait for children | |
68 | 1 while (waitpid(-1, POSIX::WNOHANG()) > 0); | |
69 | ||
70 | $self->exit_daemon(0); | |
71 | } | |
72 | ||
73 | sub hup { | |
74 | my ($self) = @_; | |
75 | ||
76 | $restart_request = 1; | |
77 | } | |
78 | ||
00b58c8c SR |
79 | my $cached_kvm_version = ''; |
80 | my $next_flag_update_time; | |
81 | my $failed_flag_update_delay_sec = 120; | |
82 | ||
83 | sub update_supported_cpuflags { | |
84 | my $kvm_version = PVE::QemuServer::kvm_user_version(); | |
85 | ||
86 | # only update when QEMU/KVM version has changed, as that is the only reason | |
87 | # why flags could change without restarting pvestatd | |
88 | return if $cached_kvm_version && $cached_kvm_version eq $kvm_version; | |
89 | ||
90 | if ($next_flag_update_time && $next_flag_update_time > time()) { | |
91 | return; | |
92 | } | |
93 | $next_flag_update_time = 0; | |
94 | ||
95 | my $supported_cpuflags = eval { PVE::QemuServer::query_supported_cpu_flags() }; | |
96 | warn $@ if $@; | |
97 | ||
98 | if (!$supported_cpuflags || | |
99 | (!$supported_cpuflags->{tcg} && !$supported_cpuflags->{kvm})) { | |
100 | # something went wrong, clear broadcast flags and set try-again delay | |
101 | warn "CPU flag detection failed, will try again after delay\n"; | |
102 | $next_flag_update_time = time() + $failed_flag_update_delay_sec; | |
103 | ||
104 | $supported_cpuflags = {}; | |
105 | } else { | |
106 | # only set cached version if there's actually something to braodcast | |
107 | $cached_kvm_version = $kvm_version; | |
108 | } | |
109 | ||
110 | for my $accel ("tcg", "kvm") { | |
111 | if ($supported_cpuflags->{$accel}) { | |
112 | PVE::Cluster::broadcast_node_kv("cpuflags-$accel", join(' ', @{$supported_cpuflags->{$accel}})); | |
113 | } else { | |
114 | # clear potentially invalid data | |
115 | PVE::Cluster::broadcast_node_kv("cpuflags-$accel", ''); | |
116 | } | |
117 | } | |
118 | } | |
119 | ||
50786956 DM |
120 | my $generate_rrd_string = sub { |
121 | my ($data) = @_; | |
122 | ||
123 | return join(':', map { $_ // 'U' } @$data); | |
124 | }; | |
125 | ||
efd04666 DM |
126 | sub update_node_status { |
127 | my ($status_cfg) = @_; | |
128 | ||
efd04666 DM |
129 | my ($uptime) = PVE::ProcFSTools::read_proc_uptime(); |
130 | ||
78873100 TL |
131 | my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg(); |
132 | my $stat = PVE::ProcFSTools::read_proc_stat(); | |
efd04666 | 133 | my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); |
78873100 | 134 | my $maxcpu = $cpuinfo->{cpus}; |
efd04666 | 135 | |
00b58c8c SR |
136 | update_supported_cpuflags(); |
137 | ||
efd04666 DM |
138 | my $subinfo = PVE::INotify::read_file('subscription'); |
139 | my $sublevel = $subinfo->{level} || ''; | |
140 | ||
78873100 | 141 | my $netdev = PVE::ProcFSTools::read_proc_net_dev(); |
efd04666 | 142 | # traffic from/to physical interface cards |
78873100 TL |
143 | my ($netin, $netout) = (0, 0); |
144 | for my $dev (grep { /^$PVE::Network::PHYSICAL_NIC_RE$/ } keys %$netdev) { | |
efd04666 DM |
145 | $netin += $netdev->{$dev}->{receive}; |
146 | $netout += $netdev->{$dev}->{transmit}; | |
147 | } | |
78873100 | 148 | |
efd04666 DM |
149 | my $meminfo = PVE::ProcFSTools::read_meminfo(); |
150 | ||
151 | my $dinfo = df('/', 1); # output is bytes | |
efd04666 DM |
152 | # everything not free is considered to be used |
153 | my $dused = $dinfo->{blocks} - $dinfo->{bfree}; | |
154 | ||
78873100 TL |
155 | my $ctime = time(); |
156 | ||
50786956 DM |
157 | my $data = $generate_rrd_string->( |
158 | [$uptime, $sublevel, $ctime, $avg1, $maxcpu, $stat->{cpu}, $stat->{wait}, | |
159 | $meminfo->{memtotal}, $meminfo->{memused}, | |
160 | $meminfo->{swaptotal}, $meminfo->{swapused}, | |
78873100 TL |
161 | $dinfo->{blocks}, $dused, $netin, $netout] |
162 | ); | |
efd04666 DM |
163 | PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data); |
164 | ||
5e82aaac TL |
165 | my $node_metric = { |
166 | uptime => $uptime, | |
167 | cpustat => $stat, | |
168 | memory => $meminfo, | |
169 | blockstat => $dinfo, | |
170 | nics => $netdev, | |
171 | }; | |
172 | $node_metric->{cpustat}->@{qw(avg1 avg5 avg15)} = ($avg1, $avg5, $avg15); | |
173 | $node_metric->{cpustat}->{cpus} = $maxcpu; | |
174 | ||
87be2c19 TL |
175 | my $transactions = PVE::ExtMetric::transactions_start($status_cfg); |
176 | PVE::ExtMetric::update_all($transactions, 'node', $nodename, $node_metric, $ctime); | |
177 | PVE::ExtMetric::transactions_finish($transactions); | |
efd04666 DM |
178 | } |
179 | ||
180 | sub auto_balloning { | |
181 | my ($vmstatus) = @_; | |
182 | ||
0dd73a7f | 183 | my $log = sub { $opt_debug and printf @_ }; |
efd04666 DM |
184 | |
185 | my $hostmeminfo = PVE::ProcFSTools::read_meminfo(); | |
0dd73a7f | 186 | # NOTE: to debug, run 'pvestatd -d' and set memtotal here |
efd04666 | 187 | #$hostmeminfo->{memtotal} = int(2*1024*1024*1024/0.8); # you can set this to test |
efd04666 DM |
188 | my $hostfreemem = $hostmeminfo->{memtotal} - $hostmeminfo->{memused}; |
189 | ||
0dd73a7f TL |
190 | # try to use ~80% host memory; goal is the change amount required to achieve that |
191 | my $goal = int($hostmeminfo->{memtotal} * 0.8 - $hostmeminfo->{memused}); | |
192 | $log->("host goal: $goal free: $hostfreemem total: $hostmeminfo->{memtotal}\n"); | |
efd04666 DM |
193 | |
194 | my $maxchange = 100*1024*1024; | |
195 | my $res = PVE::AutoBalloon::compute_alg1($vmstatus, $goal, $maxchange); | |
efd04666 | 196 | |
0dd73a7f TL |
197 | for my $vmid (sort keys %$res) { |
198 | my $target = int($res->{$vmid}); | |
199 | my $current = int($vmstatus->{$vmid}->{balloon}); | |
200 | next if $target == $current; # no need to change | |
201 | ||
202 | $log->("BALLOON $vmid to $target (%d)\n", $target - $current); | |
e2509f4e | 203 | eval { PVE::QemuServer::Monitor::mon_cmd($vmid, "balloon", value => int($target)) }; |
0dd73a7f | 204 | warn $@ if $@; |
efd04666 DM |
205 | } |
206 | } | |
207 | ||
208 | sub update_qemu_status { | |
209 | my ($status_cfg) = @_; | |
210 | ||
211 | my $ctime = time(); | |
efd04666 DM |
212 | my $vmstatus = PVE::QemuServer::vmstatus(undef, 1); |
213 | ||
214 | eval { auto_balloning($vmstatus); }; | |
215 | syslog('err', "auto ballooning error: $@") if $@; | |
216 | ||
87be2c19 | 217 | my $transactions = PVE::ExtMetric::transactions_start($status_cfg); |
efd04666 DM |
218 | foreach my $vmid (keys %$vmstatus) { |
219 | my $d = $vmstatus->{$vmid}; | |
220 | my $data; | |
221 | my $status = $d->{qmpstatus} || $d->{status} || 'stopped'; | |
222 | my $template = $d->{template} ? $d->{template} : "0"; | |
223 | if ($d->{pid}) { # running | |
50786956 DM |
224 | $data = $generate_rrd_string->( |
225 | [$d->{uptime}, $d->{name}, $status, $template, $ctime, $d->{cpus}, $d->{cpu}, | |
226 | $d->{maxmem}, $d->{mem}, $d->{maxdisk}, $d->{disk}, | |
227 | $d->{netin}, $d->{netout}, $d->{diskread}, $d->{diskwrite}]); | |
efd04666 | 228 | } else { |
50786956 DM |
229 | $data = $generate_rrd_string->( |
230 | [0, $d->{name}, $status, $template, $ctime, $d->{cpus}, undef, | |
231 | $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]); | |
efd04666 DM |
232 | } |
233 | PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data); | |
234 | ||
87be2c19 | 235 | PVE::ExtMetric::update_all($transactions, 'qemu', $vmid, $d, $ctime, $nodename); |
efd04666 | 236 | } |
87be2c19 TL |
237 | |
238 | PVE::ExtMetric::transactions_finish($transactions); | |
efd04666 DM |
239 | } |
240 | ||
241 | sub remove_stale_lxc_consoles { | |
242 | ||
243 | my $vmstatus = PVE::LXC::vmstatus(); | |
244 | my $pidhash = PVE::LXC::find_lxc_console_pids(); | |
245 | ||
246 | foreach my $vmid (keys %$pidhash) { | |
247 | next if defined($vmstatus->{$vmid}); | |
248 | syslog('info', "remove stale lxc-console for CT $vmid"); | |
249 | foreach my $pid (@{$pidhash->{$vmid}}) { | |
250 | kill(9, $pid); | |
251 | } | |
252 | } | |
253 | } | |
254 | ||
b3f1adb2 DM |
255 | my $rebalance_error_count = {}; |
256 | ||
eacb5482 | 257 | my $NO_REBALANCE; |
41db757b | 258 | sub rebalance_lxc_containers { |
eacb5482 WB |
259 | # Make sure we can find the cpuset controller path: |
260 | return if $NO_REBALANCE; | |
ce251651 DC |
261 | my $cpuset_base = eval { PVE::CGroup::cpuset_controller_path() }; |
262 | if (my $err = $@) { | |
263 | syslog('info', "could not get cpuset controller path: $err"); | |
264 | } | |
265 | ||
eacb5482 WB |
266 | if (!defined($cpuset_base)) { |
267 | $NO_REBALANCE = 1; | |
268 | return; | |
269 | } | |
41db757b | 270 | |
eacb5482 WB |
271 | # Figure out the cpu count & highest ID |
272 | my $all_cpus = PVE::CpuSet->new_from_path($cpuset_base, 1); | |
41db757b DM |
273 | my @allowed_cpus = $all_cpus->members(); |
274 | my $cpucount = scalar(@allowed_cpus); | |
127470f4 | 275 | my $max_cpuid = $allowed_cpus[-1]; |
41db757b | 276 | |
127470f4 | 277 | my @cpu_ctcount = (0) x ($max_cpuid+1); |
41db757b DM |
278 | my @balanced_cts; |
279 | ||
eacb5482 WB |
280 | # A mapping { vmid => cgroup_payload_path } for containers where namespace |
281 | # separation is active and recognized. | |
282 | my $ctinfo = {}; | |
283 | ||
0b959507 DM |
284 | my $modify_cpuset = sub { |
285 | my ($vmid, $cpuset, $newset) = @_; | |
286 | ||
b3f1adb2 | 287 | if (!$rebalance_error_count->{$vmid}) { |
b707257a | 288 | syslog('info', "modified cpu set for lxc/$vmid: " . $newset->short_string()); |
b3f1adb2 DM |
289 | } |
290 | ||
0b959507 | 291 | eval { |
eacb5482 | 292 | my $cgbase = $ctinfo->{$vmid}; |
cbce367d | 293 | |
eacb5482 | 294 | if (defined($cgbase)) { |
cbce367d | 295 | # allow all, so that we can set new cpuset in /ns |
eacb5482 | 296 | $all_cpus->write_to_path($cgbase); |
cbce367d | 297 | eval { |
eacb5482 | 298 | $newset->write_to_path("$cgbase/ns"); |
cbce367d DM |
299 | }; |
300 | if (my $err = $@) { | |
301 | warn $err if !$rebalance_error_count->{$vmid}++; | |
302 | # restore original | |
eacb5482 | 303 | $cpuset->write_to_path($cgbase); |
cbce367d DM |
304 | } else { |
305 | # also apply to container root cgroup | |
eacb5482 | 306 | $newset->write_to_path($cgbase); |
cbce367d DM |
307 | $rebalance_error_count->{$vmid} = 0; |
308 | } | |
0b959507 | 309 | } else { |
cbce367d | 310 | # old style container |
eacb5482 | 311 | $newset->write_to_path($cgbase); |
b3f1adb2 | 312 | $rebalance_error_count->{$vmid} = 0; |
0b959507 DM |
313 | } |
314 | }; | |
b3f1adb2 DM |
315 | if (my $err = $@) { |
316 | warn $err if !$rebalance_error_count->{$vmid}++; | |
317 | } | |
0b959507 DM |
318 | }; |
319 | ||
e0dc09ad DM |
320 | my $ctlist = PVE::LXC::config_list(); |
321 | ||
322 | foreach my $vmid (sort keys %$ctlist) { | |
eacb5482 | 323 | my $cgpath = "$cpuset_base/lxc/$vmid"; |
eacb5482 WB |
324 | if (-d "$cgpath/ns") { |
325 | $ctinfo->{$vmid} = $cgpath; | |
326 | } else { | |
b707257a | 327 | next; # old style container |
eacb5482 | 328 | } |
41db757b | 329 | |
b707257a TL |
330 | my ($conf, $cpuset) = eval {( |
331 | PVE::LXC::Config->load_config($vmid), | |
332 | PVE::CpuSet->new_from_path($cgpath), | |
333 | )}; | |
41db757b DM |
334 | if (my $err = $@) { |
335 | warn $err; | |
336 | next; | |
337 | } | |
338 | ||
339 | my @cpuset_members = $cpuset->members(); | |
340 | ||
911a8e4f OB |
341 | if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus') |
342 | && !PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup2.cpuset.cpus') | |
343 | ) { | |
8b750abc DM |
344 | my $cores = $conf->{cores} || $cpucount; |
345 | $cores = $cpucount if $cores > $cpucount; | |
41db757b | 346 | |
b707257a | 347 | # see if the number of cores was hot-reduced or hasn't been enacted at all yet |
2499255b DM |
348 | my $newset = PVE::CpuSet->new(); |
349 | if ($cores < scalar(@cpuset_members)) { | |
350 | for (my $i = 0; $i < $cores; $i++) { | |
351 | $newset->insert($cpuset_members[$i]); | |
352 | } | |
353 | } elsif ($cores > scalar(@cpuset_members)) { | |
354 | my $count = $newset->insert(@cpuset_members); | |
355 | foreach my $cpu (@allowed_cpus) { | |
356 | $count += $newset->insert($cpu); | |
357 | last if $count >= $cores; | |
358 | } | |
359 | } else { | |
360 | $newset->insert(@cpuset_members); | |
361 | } | |
07f9595f | 362 | |
2499255b DM |
363 | # Apply hot-plugged changes if any: |
364 | if (!$newset->is_equal($cpuset)) { | |
365 | @cpuset_members = $newset->members(); | |
0b959507 | 366 | $modify_cpuset->($vmid, $cpuset, $newset); |
2499255b | 367 | } |
07f9595f | 368 | |
2499255b DM |
369 | # Note: no need to rebalance if we already use all cores |
370 | push @balanced_cts, [$vmid, $cores, $newset] | |
8b750abc | 371 | if defined($conf->{cores}) && ($cores != $cpucount); |
2499255b | 372 | } |
07f9595f | 373 | |
2499255b | 374 | foreach my $cpu (@cpuset_members) { |
ccfff920 | 375 | $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid; |
07f9595f | 376 | } |
2499255b | 377 | } |
07f9595f | 378 | |
2499255b DM |
379 | my $find_best_cpu = sub { |
380 | my ($cpulist, $cpu) = @_; | |
07f9595f | 381 | |
2499255b DM |
382 | my $cur_cost = $cpu_ctcount[$cpu]; |
383 | my $cur_cpu = $cpu; | |
41db757b | 384 | |
2499255b DM |
385 | foreach my $candidate (@$cpulist) { |
386 | my $cost = $cpu_ctcount[$candidate]; | |
e8c41dc5 | 387 | if ($cost < ($cur_cost - 1)) { |
2499255b DM |
388 | $cur_cost = $cost; |
389 | $cur_cpu = $candidate; | |
390 | } | |
07f9595f DM |
391 | } |
392 | ||
2499255b DM |
393 | return $cur_cpu; |
394 | }; | |
395 | ||
396 | foreach my $bct (@balanced_cts) { | |
397 | my ($vmid, $cores, $cpuset) = @$bct; | |
41db757b | 398 | |
e8c41dc5 | 399 | my $rest = [ grep { !$cpuset->has($_) } @allowed_cpus ]; |
2499255b | 400 | |
e8c41dc5 TL |
401 | my $newset = PVE::CpuSet->new(); |
402 | for my $cpu ($cpuset->members()) { | |
403 | my $best = $find_best_cpu->($rest, $cpu); | |
2499255b DM |
404 | if ($best != $cpu) { |
405 | $cpu_ctcount[$best]++; | |
406 | $cpu_ctcount[$cpu]--; | |
407 | } | |
408 | $newset->insert($best); | |
41db757b DM |
409 | } |
410 | ||
411 | if (!$newset->is_equal($cpuset)) { | |
0b959507 | 412 | $modify_cpuset->($vmid, $cpuset, $newset); |
41db757b DM |
413 | } |
414 | } | |
415 | } | |
416 | ||
efd04666 DM |
417 | sub update_lxc_status { |
418 | my ($status_cfg) = @_; | |
419 | ||
420 | my $ctime = time(); | |
efd04666 DM |
421 | my $vmstatus = PVE::LXC::vmstatus(); |
422 | ||
87be2c19 TL |
423 | my $transactions = PVE::ExtMetric::transactions_start($status_cfg); |
424 | ||
efd04666 DM |
425 | foreach my $vmid (keys %$vmstatus) { |
426 | my $d = $vmstatus->{$vmid}; | |
427 | my $template = $d->{template} ? $d->{template} : "0"; | |
428 | my $data; | |
429 | if ($d->{status} eq 'running') { # running | |
50786956 DM |
430 | $data = $generate_rrd_string->( |
431 | [$d->{uptime}, $d->{name}, $d->{status}, $template, | |
432 | $ctime, $d->{cpus}, $d->{cpu}, | |
433 | $d->{maxmem}, $d->{mem}, | |
434 | $d->{maxdisk}, $d->{disk}, | |
435 | $d->{netin}, $d->{netout}, | |
436 | $d->{diskread}, $d->{diskwrite}]); | |
efd04666 | 437 | } else { |
50786956 DM |
438 | $data = $generate_rrd_string->( |
439 | [0, $d->{name}, $d->{status}, $template, $ctime, $d->{cpus}, undef, | |
440 | $d->{maxmem}, undef, $d->{maxdisk}, $d->{disk}, undef, undef, undef, undef]); | |
efd04666 DM |
441 | } |
442 | PVE::Cluster::broadcast_rrd("pve2.3-vm/$vmid", $data); | |
443 | ||
87be2c19 | 444 | PVE::ExtMetric::update_all($transactions, 'lxc', $vmid, $d, $ctime, $nodename); |
efd04666 | 445 | } |
87be2c19 | 446 | PVE::ExtMetric::transactions_finish($transactions); |
efd04666 DM |
447 | } |
448 | ||
449 | sub update_storage_status { | |
450 | my ($status_cfg) = @_; | |
451 | ||
bbcfdc08 | 452 | my $cfg = PVE::Storage::config(); |
efd04666 | 453 | my $ctime = time(); |
efd04666 DM |
454 | my $info = PVE::Storage::storage_info($cfg); |
455 | ||
87be2c19 TL |
456 | my $transactions = PVE::ExtMetric::transactions_start($status_cfg); |
457 | ||
efd04666 DM |
458 | foreach my $storeid (keys %$info) { |
459 | my $d = $info->{$storeid}; | |
460 | next if !$d->{active}; | |
461 | ||
50786956 | 462 | my $data = $generate_rrd_string->([$ctime, $d->{total}, $d->{used}]); |
efd04666 DM |
463 | |
464 | my $key = "pve2-storage/${nodename}/$storeid"; | |
465 | PVE::Cluster::broadcast_rrd($key, $data); | |
466 | ||
87be2c19 | 467 | PVE::ExtMetric::update_all($transactions, 'storage', $nodename, $storeid, $d, $ctime); |
efd04666 | 468 | } |
87be2c19 | 469 | PVE::ExtMetric::transactions_finish($transactions); |
efd04666 DM |
470 | } |
471 | ||
5ea29d13 FG |
472 | sub rotate_authkeys { |
473 | PVE::AccessControl::rotate_authkey() if !PVE::AccessControl::check_authkey(1); | |
474 | } | |
475 | ||
a6dff455 TL |
476 | sub update_ceph_metadata { |
477 | return if !PVE::Ceph::Tools::check_ceph_inited(1); # nothing to do | |
478 | ||
479 | PVE::Ceph::Services::broadcast_ceph_services(); | |
a78fd21f | 480 | |
0496138e | 481 | PVE::Ceph::Services::broadcast_ceph_versions(); |
fea39196 DC |
482 | } |
483 | ||
a36565ba AD |
484 | sub update_sdn_status { |
485 | ||
486 | if($have_sdn) { | |
487 | my ($transport_status, $vnet_status) = PVE::Network::SDN::status(); | |
488 | ||
489 | my $status = $transport_status ? encode_json($transport_status) : undef; | |
490 | PVE::Cluster::broadcast_node_kv("sdn", $status); | |
491 | } | |
492 | } | |
493 | ||
a6a681b9 WB |
494 | my $broadcast_version_info_done = 0; |
495 | my sub broadcast_version_info : prototype() { | |
496 | if (!$broadcast_version_info_done) { | |
497 | PVE::Cluster::broadcast_node_kv( | |
498 | 'version-info', | |
499 | encode_json(PVE::pvecfg::version_info()), | |
500 | ); | |
501 | $broadcast_version_info_done = 1; | |
502 | } | |
503 | } | |
504 | ||
efd04666 DM |
505 | sub update_status { |
506 | ||
507 | # update worker list. This is not really required and | |
508 | # we just call this to make sure that we have a correct | |
509 | # list in case of an unexpected crash. | |
8a9bf777 DM |
510 | my $rpcenv = PVE::RPCEnvironment::get(); |
511 | ||
efd04666 | 512 | eval { |
8a9bf777 | 513 | my $tlist = $rpcenv->active_workers(); |
efd04666 DM |
514 | PVE::Cluster::broadcast_tasklist($tlist); |
515 | }; | |
516 | my $err = $@; | |
517 | syslog('err', $err) if $err; | |
518 | ||
519 | my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg'); | |
520 | ||
521 | eval { | |
522 | update_node_status($status_cfg); | |
523 | }; | |
524 | $err = $@; | |
525 | syslog('err', "node status update error: $err") if $err; | |
526 | ||
527 | eval { | |
528 | update_qemu_status($status_cfg); | |
529 | }; | |
530 | $err = $@; | |
531 | syslog('err', "qemu status update error: $err") if $err; | |
532 | ||
533 | eval { | |
534 | update_lxc_status($status_cfg); | |
535 | }; | |
536 | $err = $@; | |
537 | syslog('err', "lxc status update error: $err") if $err; | |
538 | ||
e0dc09ad DM |
539 | eval { |
540 | rebalance_lxc_containers(); | |
541 | }; | |
542 | $err = $@; | |
543 | syslog('err', "lxc cpuset rebalance error: $err") if $err; | |
544 | ||
efd04666 DM |
545 | eval { |
546 | update_storage_status($status_cfg); | |
547 | }; | |
548 | $err = $@; | |
549 | syslog('err', "storage status update error: $err") if $err; | |
550 | ||
551 | eval { | |
552 | remove_stale_lxc_consoles(); | |
553 | }; | |
554 | $err = $@; | |
555 | syslog('err', "lxc console cleanup error: $err") if $err; | |
5ea29d13 FG |
556 | |
557 | eval { | |
558 | rotate_authkeys(); | |
559 | }; | |
560 | $err = $@; | |
561 | syslog('err', "authkey rotation error: $err") if $err; | |
562 | ||
fea39196 | 563 | eval { |
a6dff455 | 564 | update_ceph_metadata(); |
fea39196 DC |
565 | }; |
566 | $err = $@; | |
2a8e5149 | 567 | syslog('err', "ceph metadata update error: $err") if $err; |
fea39196 | 568 | |
a36565ba AD |
569 | eval { |
570 | update_sdn_status(); | |
571 | }; | |
572 | $err = $@; | |
573 | syslog('err', "sdn status update error: $err") if $err; | |
574 | ||
a6a681b9 WB |
575 | eval { |
576 | broadcast_version_info(); | |
577 | }; | |
578 | $err = $@; | |
579 | syslog('err', "version info update error: $err") if $err; | |
efd04666 DM |
580 | } |
581 | ||
582 | my $next_update = 0; | |
583 | ||
584 | # do not update directly after startup, because install scripts | |
585 | # have a problem with that | |
586 | my $cycle = 0; | |
587 | my $updatetime = 10; | |
588 | ||
589 | my $initial_memory_usage; | |
590 | ||
591 | sub run { | |
592 | my ($self) = @_; | |
593 | ||
594 | for (;;) { # forever | |
595 | ||
596 | $next_update = time() + $updatetime; | |
597 | ||
598 | if ($cycle) { | |
599 | my ($ccsec, $cusec) = gettimeofday (); | |
600 | eval { | |
601 | # syslog('info', "start status update"); | |
602 | PVE::Cluster::cfs_update(); | |
603 | update_status(); | |
604 | }; | |
605 | my $err = $@; | |
606 | ||
607 | if ($err) { | |
608 | syslog('err', "status update error: $err"); | |
609 | } | |
610 | ||
611 | my ($ccsec_end, $cusec_end) = gettimeofday (); | |
612 | my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000; | |
613 | ||
614 | syslog('info', sprintf("status update time (%.3f seconds)", $cptime)) | |
615 | if ($cptime > 5); | |
616 | } | |
617 | ||
618 | $cycle++; | |
619 | ||
620 | my $mem = PVE::ProcFSTools::read_memory_usage(); | |
cc3d280b | 621 | my $resident_kb = $mem->{resident} / 1024; |
efd04666 DM |
622 | |
623 | if (!defined($initial_memory_usage) || ($cycle < 10)) { | |
cc3d280b | 624 | $initial_memory_usage = $resident_kb; |
efd04666 | 625 | } else { |
cc3d280b | 626 | my $diff = $resident_kb - $initial_memory_usage; |
2112d310 | 627 | if ($diff > 15 * 1024) { |
efd04666 | 628 | syslog ('info', "restarting server after $cycle cycles to " . |
cc3d280b | 629 | "reduce memory usage (free $resident_kb ($diff) KB)"); |
efd04666 DM |
630 | $self->restart_daemon(); |
631 | } | |
632 | } | |
633 | ||
634 | my $wcount = 0; | |
635 | while ((time() < $next_update) && | |
636 | ($wcount < $updatetime) && # protect against time wrap | |
637 | !$restart_request) { $wcount++; sleep (1); }; | |
638 | ||
639 | $self->restart_daemon() if $restart_request; | |
640 | } | |
641 | } | |
642 | ||
643 | $daemon->register_start_command(); | |
644 | $daemon->register_restart_command(1); | |
645 | $daemon->register_stop_command(); | |
646 | $daemon->register_status_command(); | |
647 | ||
648 | our $cmddef = { | |
649 | start => [ __PACKAGE__, 'start', []], | |
650 | restart => [ __PACKAGE__, 'restart', []], | |
651 | stop => [ __PACKAGE__, 'stop', []], | |
652 | status => [ __PACKAGE__, 'status', [], undef, sub { print shift . "\n";} ], | |
653 | }; | |
654 | ||
efd04666 DM |
655 | 1; |
656 | ||
efd04666 DM |
657 | |
658 | ||
659 | ||
660 |