]> git.proxmox.com Git - pve-common.git/blob - data/PVE/Daemon.pm
Daemon: implement pre-fork server (max_wrokers property)
[pve-common.git] / data / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * correctly daemonize (redirect STDIN/STDOUT)
9 # * restart by stop/start, exec, or signal HUP
10 # * daemon restart on error (option 'restart_on_error')
11 # * handle worker processes (option 'max_workers')
12
13 use strict;
14 use warnings;
15 use PVE::SafeSyslog;
16 use PVE::INotify;
17
18 use POSIX ":sys_wait_h";
19 use Fcntl ':flock';
20 use Getopt::Long;
21 use Time::HiRes qw (gettimeofday);
22
23 use base qw(PVE::CLIHandler);
24
25 $SIG{'__WARN__'} = sub {
26 my $err = $@;
27 my $t = $_[0];
28 chomp $t;
29 print "$t\n";
30 syslog('warning', "WARNING: %s", $t);
31 $@ = $err;
32 };
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37
38 my $lockpidfile = sub {
39 my ($self) = @_;
40
41 my $lkfn = $self->{pidfile} . ".lock";
42
43 if (!open (FLCK, ">>$lkfn")) {
44 my $msg = "can't aquire lock on file '$lkfn' - $!";
45 syslog ('err', $msg);
46 die "ERROR: $msg\n";
47 }
48
49 if (!flock (FLCK, LOCK_EX|LOCK_NB)) {
50 close (FLCK);
51 my $msg = "can't aquire lock '$lkfn' - $!";
52 syslog ('err', $msg);
53 die "ERROR: $msg\n";
54 }
55 };
56
57 my $writepidfile = sub {
58 my ($self) = @_;
59
60 my $pidfile = $self->{pidfile};
61
62 if (!open (PIDFH, ">$pidfile")) {
63 my $msg = "can't open pid file '$pidfile' - $!";
64 syslog ('err', $msg);
65 die "ERROR: $msg\n";
66 }
67 print PIDFH "$$\n";
68 close (PIDFH);
69 };
70
71 my $server_cleanup = sub {
72 my ($self) = @_;
73
74 unlink $self->{pidfile} . ".lock";
75 unlink $self->{pidfile};
76 };
77
78 my $finish_workers = sub {
79 my ($self) = @_;
80
81 foreach my $cpid (keys %{$self->{workers}}) {
82 my $waitpid = waitpid($cpid, WNOHANG);
83 if (defined($waitpid) && ($waitpid == $cpid)) {
84 delete ($self->{workers}->{$cpid});
85 syslog('info', "worker $cpid finished");
86 }
87 }
88 };
89
90 my $start_workers = sub {
91 my ($self) = @_;
92
93 return if $self->{terminate};
94
95 my $count = 0;
96 foreach my $cpid (keys %{$self->{workers}}) {
97 $count++;
98 }
99
100 my $need = $self->{max_workers} - $count;
101
102 return if $need <= 0;
103
104 syslog('info', "starting $need worker(s)");
105
106 while ($need > 0) {
107 my $pid = fork;
108
109 if (!defined ($pid)) {
110 syslog('err', "can't fork worker");
111 sleep (1);
112 } elsif ($pid) { # parent
113 $self->{workers}->{$pid} = 1;
114 syslog('info', "worker $pid started");
115 $need--;
116 } else {
117 $0 = "$self->{name} worker";
118
119 close(FLCK);
120
121 PVE::INotify::inotify_close();
122
123 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
124 $SIG{$sig} = 'DEFAULT'; # restore default handler
125 # AnyEvent signals only works if $SIG{XX} is
126 # undefined (perl event loop)
127 delete $SIG{$sig}; # so that we can handle events with AnyEvent
128 }
129
130 eval { $self->run(); };
131 if (my $err = $@) {
132 syslog('err', $err);
133 sleep(5); # avoid fast restarts
134 }
135
136 syslog('info', "worker exit");
137 exit (0);
138 }
139 }
140 };
141
142 my $terminate_server = sub {
143 my ($self) = @_;
144
145 $self->{terminate} = 1; # set flag to avoid worker restart
146
147 if (!$self->{max_workers}) {
148 eval { $self->shutdown(); };
149 warn $@ if $@;
150 return;
151 }
152
153 eval { $self->shutdown(); };
154 warn $@ if $@;
155
156 # we have workers - terminate them
157
158 foreach my $cpid (keys %{$self->{workers}}) {
159 kill(15, $cpid); # TERM childs
160 }
161
162 # nicely shutdown childs (give them max 10 seconds to shut down)
163 my $previous_alarm = alarm(10);
164 eval {
165 local $SIG{ALRM} = sub { die "timeout\n" };
166
167 while ((my $pid = waitpid (-1, 0)) > 0) {
168 if (defined($self->{workers}->{$pid})) {
169 delete($self->{workers}->{$pid});
170 syslog('info', "worker $pid finished");
171 }
172 }
173 alarm(0); # avoid race condition
174 };
175 my $err = $@;
176
177 alarm ($previous_alarm);
178
179 if ($err) {
180 syslog('err', "error stopping workers (will kill them now) - $err");
181 foreach my $cpid (keys %{$self->{workers}}) {
182 # KILL childs still alive!
183 if (kill (0, $cpid)) {
184 delete($self->{workers}->{$cpid});
185 syslog("err", "kill worker $cpid");
186 kill(9, $cpid);
187 # fixme: waitpid?
188 }
189 }
190 }
191 };
192
193 my $server_run = sub {
194 my ($self, $debug) = @_;
195
196 &$lockpidfile($self);
197
198 # run in background
199 my $spid;
200
201 my $restart = $ENV{RESTART_PVE_DAEMON};
202
203 delete $ENV{RESTART_PVE_DAEMON};
204
205 $self->{debug} = 1 if $debug;
206
207 $self->init();
208
209 if (!$debug) {
210 open STDIN, '</dev/null' || die "can't read /dev/null";
211 open STDOUT, '>/dev/null' || die "can't write /dev/null";
212 }
213
214 if (!$restart && !$debug) {
215 PVE::INotify::inotify_close();
216 $spid = fork();
217 if (!defined ($spid)) {
218 my $msg = "can't put server into background - fork failed";
219 syslog('err', $msg);
220 die "ERROR: $msg\n";
221 } elsif ($spid) { # parent
222 exit (0);
223 }
224 PVE::INotify::inotify_init();
225 }
226
227 &$writepidfile($self);
228
229 POSIX::setsid();
230
231 if ($restart) {
232 syslog('info' , "restarting server");
233 } else {
234 syslog('info' , "starting server");
235 }
236
237 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
238
239 my $old_sig_term = $SIG{TERM};
240 local $SIG{TERM} = sub {
241 local ($@, $!, $?); # do not overwrite error vars
242 syslog('info', "received signal TERM");
243 &$terminate_server($self);
244 &$server_cleanup($self);
245 &$old_sig_term(@_) if $old_sig_term;
246 };
247
248 my $old_sig_quit = $SIG{QUIT};
249 local $SIG{QUIT} = sub {
250 local ($@, $!, $?); # do not overwrite error vars
251 syslog('info', "received signal QUIT");
252 &$terminate_server($self);
253 &$server_cleanup($self);
254 &$old_sig_quit(@_) if $old_sig_quit;
255 };
256
257 my $old_sig_int = $SIG{INT};
258 local $SIG{INT} = sub {
259 local ($@, $!, $?); # do not overwrite error vars
260 syslog('info', "received signal INT");
261 $SIG{INT} = 'DEFAULT'; # allow to terminate now
262 &$terminate_server($self);
263 &$server_cleanup($self);
264 &$old_sig_int(@_) if $old_sig_int;
265 };
266
267 $SIG{HUP} = sub {
268 local ($@, $!, $?); # do not overwrite error vars
269 syslog('info', "received signal HUP");
270 if ($self->{max_workers}) {
271 &$terminate_server($self);
272 $self->{got_hup_signal} = 1;
273 } elsif ($self->can('hup')) {
274 eval { $self->hup() };
275 warn $@ if $@;
276 }
277 };
278
279 eval {
280 if ($self->{max_workers}) {
281 my $old_sig_chld = $SIG{CHLD};
282 local $SIG{CHLD} = sub {
283 local ($@, $!, $?); # do not overwrite error vars
284 &$finish_workers($self);
285 &$old_sig_chld(@_) if $old_sig_chld;
286 };
287
288 for (;;) { # forever
289 &$start_workers($self);
290 sleep(5);
291 &$finish_workers($self);
292 last if $self->{terminate};
293 }
294
295 } else {
296 $self->run();
297 }
298 };
299 my $err = $@;
300
301 if ($err) {
302 syslog ('err', "ERROR: $err");
303
304 # fixme: kill all workers
305
306 if (my $wait_time = $self->{restart_on_error}) {
307 $self->restart_daemon($wait_time);
308 } else {
309 $self->exit_daemon(-1);
310 }
311 }
312
313 if ($self->{got_hup_signal}) {
314 $self->restart_daemon();
315 } else {
316 $self->exit_daemon(0);
317 }
318 };
319
320 sub new {
321 my ($this, $name, $cmdline, %params) = @_;
322
323 die "please run as root\n" if !$ENV{RESTART_PVE_DAEMON} && ($> != 0);
324
325 die "missing name" if !$name;
326
327 die "can't create more that one PVE::Daemon" if $daemon_initialized;
328 $daemon_initialized = 1;
329
330 PVE::INotify::inotify_init();
331
332 initlog($name);
333
334 my $class = ref($this) || $this;
335
336 my $self = bless {
337 name => $name,
338 run_dir => '/var/run',
339 workers => {},
340 }, $class;
341
342 foreach my $opt (keys %params) {
343 my $value = $params{$opt};
344 if ($opt eq 'restart_on_error') {
345 $self->{$opt} = $value;
346 } elsif ($opt eq 'stop_wait_time') {
347 $self->{$opt} = $value;
348 } elsif ($opt eq 'run_dir') {
349 $self->{$opt} = $value;
350 } elsif ($opt eq 'max_workers') {
351 $self->{$opt} = $value;
352 } else {
353 die "unknown option '$opt'";
354 }
355 }
356
357 $self->{pidfile} = "$self->{run_dir}/${name}.pid";
358
359 $self->{nodename} = PVE::INotify::nodename();
360
361 $self->{cmdline} = [];
362
363 foreach my $el (@$cmdline) {
364 $el =~ m/^(.*)$/; # untaint
365 push @{$self->{cmdline}}, $1;
366 }
367
368 $0 = $name;
369
370 return $self;
371 }
372
373 sub exit_daemon {
374 my ($self, $status) = @_;
375
376 syslog("info", "server stopped");
377
378 &$server_cleanup($self);
379
380 exit($status);
381 }
382
383 sub restart_daemon {
384 my ($self, $waittime) = @_;
385
386 syslog('info', "server shutdown (restart)");
387
388 $ENV{RESTART_PVE_DAEMON} = 1;
389
390 sleep($waittime) if $waittime; # avoid high server load due to restarts
391
392 PVE::INotify::inotify_close();
393
394 exec (@{$self->{cmdline}});
395
396 exit (-1); # never reached?
397 }
398
399 # please overwrite in subclass
400 # this is called at startup - before forking
401 sub init {
402 my ($self) = @_;
403
404 }
405
406 # please overwrite in subclass
407 sub shutdown {
408 my ($self) = @_;
409
410 syslog('info' , "server closing");
411
412 if (!$self->{max_workers}) {
413 # wait for children
414 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
415 }
416 }
417
418 # please define in subclass
419 #sub hup {
420 # my ($self) = @_;
421 #
422 # syslog('info' , "received signal HUP (restart)");
423 #}
424
425 # please overwrite in subclass
426 sub run {
427 my ($self) = @_;
428
429 for (;;) { # forever
430 syslog('info' , "server is running");
431 sleep(5);
432 }
433 }
434
435 sub start {
436 my ($self, $debug) = @_;
437
438 &$server_run($self, $debug);
439 }
440
441 my $read_pid = sub {
442 my ($self) = @_;
443
444 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
445
446 return 0 if !$pid_str;
447
448 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
449
450 my $pid = int($1);
451
452 return $pid;
453 };
454
455 sub running {
456 my ($self) = @_;
457
458 my $pid = &$read_pid($self);
459
460 if ($pid) {
461 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
462 return wantarray ? ($res, $pid) : $res;
463 }
464
465 return wantarray ? (0, 0) : 0;
466 }
467
468 sub stop {
469 my ($self) = @_;
470
471 my $pid = &$read_pid($self);
472
473 return if !$pid;
474
475 if (PVE::ProcFSTools::check_process_running($pid)) {
476 kill(15, $pid); # send TERM signal
477 # give some time
478 my $wait_time = $self->{stop_wait_time} || 5;
479 my $running = 1;
480 for (my $i = 0; $i < $wait_time; $i++) {
481 $running = PVE::ProcFSTools::check_process_running($pid);
482 last if !$running;
483 sleep (1);
484 }
485
486 syslog('err', "server still running - send KILL") if $running;
487
488 # to be sure
489 kill(9, $pid);
490 waitpid($pid, 0);
491 }
492
493 if (-f $self->{pidfile}) {
494 # try to get the lock
495 &$lockpidfile($self);
496 &$server_cleanup($self);
497 }
498 }
499
500 sub register_start_command {
501 my ($self, $class, $description) = @_;
502
503 $class->register_method({
504 name => 'start',
505 path => 'start',
506 method => 'POST',
507 description => $description || "Start the daemon.",
508 parameters => {
509 additionalProperties => 0,
510 properties => {
511 debug => {
512 description => "Debug mode - stay in foreground",
513 type => "boolean",
514 optional => 1,
515 default => 0,
516 },
517 },
518 },
519 returns => { type => 'null' },
520
521 code => sub {
522 my ($param) = @_;
523
524 $self->start($param->{debug});
525
526 return undef;
527 }});
528 }
529
530 my $reload_daemon = sub {
531 my ($self, $use_hup) = @_;
532
533 if (my $restart = $ENV{RESTART_PVE_DAEMON}) {
534 $self->start();
535 } else {
536 my ($running, $pid) = $self->running();
537 if (!$running) {
538 $self->start();
539 } else {
540 if ($use_hup) {
541 syslog('info', "send HUP to $pid");
542 kill 1, $pid;
543 } else {
544 $self->stop();
545 $self->start();
546 }
547 }
548 }
549 };
550
551 sub register_restart_command {
552 my ($self, $class, $use_hup, $description) = @_;
553
554 $class->register_method({
555 name => 'restart',
556 path => 'restart',
557 method => 'POST',
558 description => $description || "Restart the daemon (or start if not running).",
559 parameters => {
560 additionalProperties => 0,
561 properties => {},
562 },
563 returns => { type => 'null' },
564
565 code => sub {
566 my ($param) = @_;
567
568 &$reload_daemon($self, $use_hup);
569
570 return undef;
571 }});
572 }
573
574 sub register_reload_command {
575 my ($self, $class, $description) = @_;
576
577 $class->register_method({
578 name => 'reload',
579 path => 'reload',
580 method => 'POST',
581 description => $description || "Reload daemon configuration (or start if not running).",
582 parameters => {
583 additionalProperties => 0,
584 properties => {},
585 },
586 returns => { type => 'null' },
587
588 code => sub {
589 my ($param) = @_;
590
591 &$reload_daemon($self, 1);
592
593 return undef;
594 }});
595 }
596
597 sub register_stop_command {
598 my ($self, $class, $description) = @_;
599
600 $class->register_method({
601 name => 'stop',
602 path => 'stop',
603 method => 'POST',
604 description => $description || "Stop the daemon.",
605 parameters => {
606 additionalProperties => 0,
607 properties => {},
608 },
609 returns => { type => 'null' },
610
611 code => sub {
612 my ($param) = @_;
613
614 $self->stop();
615
616 return undef;
617 }});
618 }
619
620 sub register_status_command {
621 my ($self, $class, $description) = @_;
622
623 $class->register_method({
624 name => 'status',
625 path => 'status',
626 method => 'GET',
627 description => "Get daemon status.",
628 parameters => {
629 additionalProperties => 0,
630 properties => {},
631 },
632 returns => {
633 type => 'string',
634 enum => ['stopped', 'running'],
635 },
636 code => sub {
637 my ($param) = @_;
638
639 return $self->running() ? 'running' : 'stopped';
640 }});
641 }
642
643 1;
644