]>
Commit | Line | Data |
---|---|---|
aff192e6 DM |
1 | #!/usr/bin/perl -w |
2 | ||
3 | use strict; | |
4 | use PVE::SafeSyslog; | |
5 | use POSIX ":sys_wait_h"; | |
6 | use Fcntl ':flock'; | |
7 | use Getopt::Long; | |
8 | use Time::HiRes qw (gettimeofday); | |
9 | use PVE::Tools; | |
10 | use PVE::ProcFSTools; | |
11 | use Filesys::Df; | |
12 | use PVE::INotify; | |
13 | use PVE::Cluster qw(cfs_read_file); | |
14 | use PVE::Storage; | |
15 | use PVE::QemuServer; | |
16 | use PVE::RPCEnvironment; | |
17 | ||
18 | $SIG{'__WARN__'} = sub { | |
19 | my $err = $@; | |
20 | my $t = $_[0]; | |
21 | chomp $t; | |
22 | syslog('warning', "WARNING: %s", $t); | |
23 | $@ = $err; | |
24 | }; | |
25 | ||
26 | initlog('pvestatd'); | |
27 | ||
28 | $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin'; | |
29 | ||
30 | die "please run as root\n" if $> != 0; | |
31 | ||
32 | my $nodename = PVE::INotify::nodename(); | |
33 | ||
34 | my $opt_debug; | |
35 | ||
36 | if (!GetOptions ('debug' => \$opt_debug)) { | |
37 | die "USAGE: $0 [--debug]\n"; | |
38 | } | |
39 | ||
40 | my $opt_pidfile = "/var/run/pvestatd.pid"; | |
41 | ||
42 | sub lockpidfile { | |
43 | my $pidfile = shift; | |
44 | my $lkfn = "$pidfile.lock"; | |
45 | ||
46 | if (!open (FLCK, ">>$lkfn")) { | |
47 | my $msg = "can't aquire lock on file '$lkfn' - $!"; | |
48 | syslog ('err', $msg); | |
49 | die "ERROR: $msg\n"; | |
50 | } | |
51 | ||
52 | if (!flock (FLCK, LOCK_EX|LOCK_NB)) { | |
53 | close (FLCK); | |
54 | my $msg = "can't aquire lock '$lkfn' - $!"; | |
55 | syslog ('err', $msg); | |
56 | die "ERROR: $msg\n"; | |
57 | } | |
58 | } | |
59 | ||
60 | sub writepidfile { | |
61 | my $pidfile = shift; | |
62 | ||
63 | if (!open (PIDFH, ">$pidfile")) { | |
64 | my $msg = "can't open pid file '$pidfile' - $!"; | |
65 | syslog ('err', $msg); | |
66 | die "ERROR: $msg\n"; | |
67 | } | |
68 | print PIDFH "$$\n"; | |
69 | close (PIDFH); | |
70 | } | |
71 | ||
72 | # try to get the lock | |
73 | lockpidfile($opt_pidfile); | |
74 | ||
75 | # run in background | |
76 | my $spid; | |
77 | ||
78 | my $restart = $ENV{RESTART_PVESTATD}; | |
79 | ||
80 | if (!$opt_debug) { | |
81 | open STDIN, '</dev/null' || die "can't read /dev/null"; | |
82 | open STDOUT, '>/dev/null' || die "can't write /dev/null"; | |
83 | } | |
84 | ||
85 | if (!$restart && !$opt_debug) { | |
86 | $spid = fork(); | |
87 | if (!defined ($spid)) { | |
88 | my $msg = "can't put server into background - fork failed"; | |
89 | syslog('err', $msg); | |
90 | die "ERROR: $msg\n"; | |
91 | } elsif ($spid) { #parent | |
92 | exit (0); | |
93 | } | |
94 | } | |
95 | ||
96 | writepidfile($opt_pidfile); | |
97 | ||
98 | open STDERR, '>&STDOUT' || die "can't close STDERR\n"; | |
99 | ||
100 | sub cleanup { | |
101 | unlink "$opt_pidfile.lock"; | |
102 | unlink "$opt_pidfile"; | |
103 | } | |
104 | ||
105 | $SIG{INT} = $SIG{TERM} = $SIG{QUIT} = sub { | |
106 | syslog('info' , "server closing"); | |
107 | ||
108 | $SIG{INT} = 'DEFAULT'; | |
109 | ||
110 | # wait for children | |
111 | 1 while (waitpid(-1, POSIX::WNOHANG()) > 0); | |
112 | ||
113 | cleanup(); | |
114 | ||
115 | exit (0); | |
116 | }; | |
117 | ||
118 | PVE::INotify::inotify_init(); | |
119 | ||
120 | my $reload_config; | |
121 | ||
122 | if ($restart) { | |
123 | syslog('info' , "restarting server"); | |
124 | } else { | |
125 | syslog('info' , "starting server"); | |
126 | } | |
127 | ||
128 | $SIG{HUP} = sub { | |
129 | $reload_config = 1; | |
130 | }; | |
131 | ||
132 | sub update_node_status { | |
133 | ||
134 | my ($avg1, $avg5, $avg15) = PVE::ProcFSTools::read_loadavg(); | |
135 | ||
136 | my $stat = PVE::ProcFSTools::read_proc_stat(); | |
137 | ||
138 | my $netdev = PVE::ProcFSTools::read_proc_net_dev(); | |
139 | ||
140 | my ($uptime) = PVE::ProcFSTools::read_proc_uptime(); | |
141 | ||
142 | my $cpuinfo = PVE::ProcFSTools::read_cpuinfo(); | |
143 | ||
144 | my $maxcpu = $cpuinfo->{cpus}; | |
145 | ||
146 | # traffic from/to physical interface cards | |
147 | my $netin = 0; | |
148 | my $netout = 0; | |
149 | foreach my $dev (keys %$netdev) { | |
150 | next if $dev !~ m/^eth\d+$/; | |
151 | $netin += $netdev->{$dev}->{receive}; | |
152 | $netout += $netdev->{$dev}->{transmit}; | |
153 | } | |
154 | ||
155 | my $meminfo = PVE::ProcFSTools::read_meminfo(); | |
156 | ||
157 | my $dinfo = df('/', 1); # output is bytes | |
158 | ||
159 | my $ctime = time(); | |
160 | ||
161 | # everything not free is considered to be used | |
162 | my $dused = $dinfo->{blocks} - $dinfo->{bfree}; | |
163 | ||
164 | my $data = "$uptime:$ctime:$avg1:$maxcpu:$stat->{cpu}:$stat->{wait}:" . | |
165 | "$meminfo->{memtotal}:$meminfo->{memused}:" . | |
166 | "$meminfo->{swaptotal}:$meminfo->{swapused}:" . | |
167 | "$dinfo->{blocks}:$dused:$netin:$netout"; | |
168 | ||
169 | PVE::Cluster::broadcast_rrd("pve2-node/$nodename", $data); | |
170 | } | |
171 | ||
172 | sub update_qemu_status { | |
173 | ||
174 | my $ctime = time(); | |
175 | ||
176 | my $vmstatus = PVE::QemuServer::vmstatus(); | |
177 | ||
178 | foreach my $vmid (keys %$vmstatus) { | |
179 | my $d = $vmstatus->{$vmid}; | |
180 | my $data; | |
181 | if ($d->{pid}) { # running | |
182 | $data = "$d->{uptime}:$d->{name}:$ctime:$d->{cpus}:$d->{cpu}:" . | |
183 | "$d->{maxmem}:$d->{mem}:" . | |
184 | "$d->{maxdisk}:$d->{disk}:" . | |
185 | "$d->{netin}:$d->{netout}:" . | |
186 | "$d->{diskread}:$d->{diskwrite}"; | |
187 | } else { | |
188 | $data = "0:$d->{name}:$ctime:$d->{cpus}::" . | |
189 | "$d->{maxmem}::" . | |
190 | "$d->{maxdisk}:$d->{disk}:" . | |
191 | ":::"; | |
192 | } | |
193 | PVE::Cluster::broadcast_rrd("pve2-vm/$vmid", $data); | |
194 | } | |
195 | } | |
196 | ||
197 | sub update_storage_status { | |
198 | ||
199 | my $cfg = cfs_read_file("storage.cfg"); | |
200 | ||
201 | my $ctime = time(); | |
202 | ||
203 | my $info = PVE::Storage::storage_info($cfg); | |
204 | ||
205 | foreach my $storeid (keys %$info) { | |
206 | my $d = $info->{$storeid}; | |
207 | next if !$d->{active}; | |
208 | ||
209 | # everything not free is considered to be used | |
210 | my $realused = $d->{total} - $d->{avail}; | |
211 | ||
212 | my $data = "$ctime:$d->{total}:$realused"; | |
213 | ||
214 | my $key = "pve2-storage/${nodename}/$storeid"; | |
215 | PVE::Cluster::broadcast_rrd($key, $data); | |
216 | } | |
217 | } | |
218 | ||
219 | sub update_status { | |
220 | ||
221 | # update worker list. This is not really required and | |
222 | # we just call this to make sure that we have a correct | |
223 | # list in case of an unexpected crash. | |
224 | eval { | |
225 | my $tlist = PVE::RPCEnvironment::active_workers(); | |
226 | PVE::Cluster::broadcast_tasklist($tlist); | |
227 | }; | |
228 | my $err = $@; | |
229 | syslog('err', $err) if $err; | |
230 | ||
231 | eval { | |
232 | update_node_status(); | |
233 | }; | |
234 | $err = $@; | |
235 | syslog('err', "node status update error: $err") if $err; | |
236 | ||
237 | eval { | |
238 | update_qemu_status(); | |
239 | }; | |
240 | $err = $@; | |
241 | syslog('err', "qemu status update error: $err") if $err; | |
242 | ||
243 | eval { | |
244 | update_storage_status(); | |
245 | }; | |
246 | $err = $@; | |
247 | syslog('err', "storage status update error: $err") if $err; | |
248 | } | |
249 | ||
250 | my $next_update = 0; | |
251 | ||
252 | # do not update directly after startup, because install scripts | |
253 | # have a problem with that | |
254 | my $cycle = 0; | |
255 | my $updatetime = 10; | |
256 | ||
257 | my $commandline = [$0, @ARGV]; | |
258 | ||
259 | $0 = "pvestatd"; | |
260 | ||
261 | sub restart_server { | |
262 | my $waittime = shift; | |
263 | ||
264 | syslog('info', "server shutdown (restart)"); | |
265 | ||
266 | $ENV{RESTART_PVESTATD} = 1; | |
267 | ||
268 | sleep($waittime) if $waittime; # avoid high server load due to restarts | |
269 | ||
270 | exec (@$commandline); | |
271 | exit (-1); # never reached? | |
272 | } | |
273 | ||
274 | for (;;) { # forever | |
275 | ||
276 | eval { | |
277 | $next_update = time() + $updatetime; | |
278 | ||
279 | if ($cycle) { | |
280 | my ($ccsec, $cusec) = gettimeofday (); | |
281 | eval { | |
282 | $reload_config = 0; | |
283 | syslog('info', "start status update"); | |
284 | PVE::Cluster::cfs_update(); | |
285 | update_status(); | |
286 | }; | |
287 | my $err = $@; | |
288 | ||
289 | if ($err) { | |
290 | syslog('err', "status update error: $err"); | |
291 | } | |
292 | ||
293 | my ($ccsec_end, $cusec_end) = gettimeofday (); | |
294 | my $cptime = ($ccsec_end-$ccsec) + ($cusec_end - $cusec)/1000000; | |
295 | ||
296 | syslog('info', sprintf("status update finished (%.3f seconds)", $cptime)); | |
297 | } | |
298 | ||
299 | $cycle++; | |
300 | ||
301 | my $mem = PVE::ProcFSTools::read_memory_usage(); | |
302 | ||
303 | if ($mem->{resident} > (35*1024*1024)) { | |
304 | syslog ('info', "restarting server after $cycle cycles to " . | |
305 | "reduce memory usage (free $mem->{resident} bytes)"); | |
306 | restart_server (); | |
307 | } | |
308 | ||
309 | my $wcount = 0; | |
310 | while ((time() < $next_update) && | |
311 | ($wcount < $updatetime) && # protect against time wrap | |
312 | !$reload_config) { $wcount++; sleep (1); }; | |
313 | }; | |
314 | ||
315 | my $err = $@; | |
316 | ||
317 | if ($err) { | |
318 | syslog ('err', "ERROR: $err"); | |
319 | restart_server(5); | |
320 | exit (0); | |
321 | } | |
322 | } | |
323 | ||
324 | exit (0); | |
325 | ||
326 | __END__ | |
327 | ||
328 | =head1 NAME | |
329 | ||
330 | pvestatd - PVE Status Daemon | |
331 | ||
332 | =head1 SYNOPSIS | |
333 | ||
334 | pvestatd | |
335 | ||
336 | =head1 DESCRIPTION | |
337 | ||
338 | Documentation is available at www.proxmox.com | |
339 | ||
340 | ||
341 | ||
342 | ||
343 |