]> git.proxmox.com Git - pve-common.git/blob - src/PVE/Daemon.pm
daemon: add missing parenthesis around list
[pve-common.git] / src / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
16
17 use strict;
18 use warnings;
19 use English;
20
21 use PVE::SafeSyslog;
22 use PVE::INotify;
23
24 use POSIX ":sys_wait_h";
25 use Fcntl ':flock';
26 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use IO::Socket::IP;
28
29 use Getopt::Long;
30 use Time::HiRes qw (gettimeofday);
31
32 use base qw(PVE::CLIHandler);
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37 my $daemon_sockets = [];
38
39 my $close_daemon_lock = sub {
40 my ($self) = @_;
41
42 return if !$self->{daemon_lock_fh};
43
44 close $self->{daemon_lock_fh};
45 delete $self->{daemon_lock_fh};
46 };
47
48 my $log_err = sub {
49 my ($msg) = @_;
50 chomp $msg;
51 print STDERR "$msg\n";
52 syslog('err', "%s", $msg);
53 };
54
55 # call this if you fork() from child
56 # Note: we already call this for workers, so it is only required
57 # if you fork inside a simple daemon (max_workers == 0).
58 sub after_fork_cleanup {
59 my ($self) = @_;
60
61 &$close_daemon_lock($self);
62
63 PVE::INotify::inotify_close();
64
65 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
66 $SIG{$sig} = 'DEFAULT'; # restore default handler
67 # AnyEvent signals only works if $SIG{XX} is
68 # undefined (perl event loop)
69 delete $SIG{$sig}; # so that we can handle events with AnyEvent
70 }
71 }
72
73 my $lockpidfile = sub {
74 my ($self) = @_;
75
76 my $lkfn = $self->{pidfile} . ".lock";
77
78 my $waittime = 0;
79
80 if (my $fd = $self->{env_pve_lock_fd}) {
81
82 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
83
84 } else {
85
86 $waittime = 5;
87 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
88 }
89
90 if (!$self->{daemon_lock_fh}) {
91 die "can't open lock '$lkfn' - $!\n";
92 }
93
94 for (my $i = 0; $i < $waittime; $i ++) {
95 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
96 sleep(1);
97 }
98
99 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
100 &$close_daemon_lock($self);
101 my $err = $!;
102
103 my ($running, $pid) = $self->running();
104 if ($running) {
105 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
106 } else {
107 die "can't aquire lock '$lkfn' - $err\n";
108 }
109 }
110 };
111
112 my $writepidfile = sub {
113 my ($self) = @_;
114
115 my $pidfile = $self->{pidfile};
116
117 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
118
119 print PIDFH "$$\n";
120 close (PIDFH);
121 };
122
123 my $server_cleanup = sub {
124 my ($self) = @_;
125
126 unlink $self->{pidfile} . ".lock";
127 unlink $self->{pidfile};
128 };
129
130 my $finish_workers = sub {
131 my ($self) = @_;
132
133 foreach my $id (qw(workers old_workers)) {
134 foreach my $cpid (keys %{$self->{$id}}) {
135 my $waitpid = waitpid($cpid, WNOHANG);
136 if (defined($waitpid) && ($waitpid == $cpid)) {
137 delete ($self->{$id}->{$cpid});
138 syslog('info', "worker $cpid finished");
139 }
140 }
141 }
142 };
143
144 my $start_workers = sub {
145 my ($self) = @_;
146
147 return if $self->{terminate};
148
149 my $count = scalar keys %{$self->{workers}};
150 my $need = $self->{max_workers} - $count;
151
152 return if $need <= 0;
153
154 syslog('info', "starting $need worker(s)");
155
156 while ($need > 0) {
157 my $pid = fork;
158
159 if (!defined ($pid)) {
160 syslog('err', "can't fork worker");
161 sleep (1);
162 } elsif ($pid) { # parent
163 $self->{workers}->{$pid} = 1;
164 syslog('info', "worker $pid started");
165 $need--;
166 } else {
167 $0 = "$self->{name} worker";
168
169 $self->after_fork_cleanup();
170
171 eval { $self->run(); };
172 if (my $err = $@) {
173 syslog('err', $err);
174 sleep(5); # avoid fast restarts
175 }
176
177 syslog('info', "worker exit");
178 exit (0);
179 }
180 }
181 };
182
183 my $terminate_old_workers = sub {
184 my ($self) = @_;
185
186 # if list is empty kill sends no signal, so no checks needed
187 kill 15, keys %{$self->{old_workers}};
188 };
189
190 my $terminate_server = sub {
191 my ($self, $allow_open_children) = @_;
192
193 $self->{terminate} = 1; # set flag to avoid worker restart
194
195 eval { $self->shutdown(); };
196 warn $@ if $@;
197
198 return if !$self->{max_workers}; # if we have no workers we're done here
199
200 # if configured, leave children running on HUP
201 return if $allow_open_children && $self->{leave_children_open_on_reload};
202
203 # else send TERM to all (old and current) child workers
204 kill 15, keys %{$self->@{'workers','old_workers'}};
205
206 # nicely shutdown childs (give them max 10 seconds to shut down)
207 my $previous_alarm = alarm(10);
208 eval {
209 local $SIG{ALRM} = sub { die "timeout\n" };
210
211 while ((my $pid = waitpid (-1, 0)) > 0) {
212 foreach my $id (qw(workers old_workers)) {
213 if (defined($self->{$id}->{$pid})) {
214 delete($self->{$id}->{$pid});
215 syslog('info', "worker $pid finished");
216 }
217 }
218 }
219 alarm(0); # avoid race condition
220 };
221 my $err = $@;
222
223 alarm ($previous_alarm);
224
225 if ($err) {
226 syslog('err', "error stopping workers (will kill them now) - $err");
227 foreach my $id (qw(workers old_workers)) {
228 foreach my $cpid (keys %{$self->{$id}}) {
229 # KILL childs still alive!
230 if (kill (0, $cpid)) {
231 delete($self->{$id}->{$cpid});
232 syslog("err", "kill worker $cpid");
233 kill(9, $cpid);
234 # fixme: waitpid?
235 }
236 }
237 }
238 }
239 };
240
241 sub setup {
242 my ($self) = @_;
243
244 initlog($self->{name});
245
246 my $restart = $ENV{RESTART_PVE_DAEMON};
247 delete $ENV{RESTART_PVE_DAEMON};
248 $self->{env_restart_pve_daemon} = $restart;
249
250 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
251 delete $ENV{PVE_DAEMON_LOCK_FD};
252 if (defined($lockfd)) {
253 die "unable to parse lock fd '$lockfd'\n"
254 if $lockfd !~ m/^(\d+)$/;
255 $lockfd = $1; # untaint
256 }
257 $self->{env_pve_lock_fd} = $lockfd;
258
259 die "please run as root\n" if !$restart && ($> != 0);
260
261 die "can't create more that one PVE::Daemon" if $daemon_initialized;
262 $daemon_initialized = 1;
263
264 PVE::INotify::inotify_init();
265
266 if (my $gidstr = $self->{setgid}) {
267 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
268 POSIX::setgid($gid) || die "setgid $gid failed - $!\n";
269 $EGID = "$gid $gid"; # this calls setgroups
270 # just to be sure
271 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
272 }
273
274 if (my $uidstr = $self->{setuid}) {
275 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
276 POSIX::setuid($uid) || die "setuid $uid failed - $!\n";
277 # just to be sure
278 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
279 }
280
281 if ($restart && $self->{max_workers}) {
282 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
283 $self->{old_workers}->{$_} = 1 foreach (split(':', $wpids));
284 }
285 }
286
287 $self->{nodename} = PVE::INotify::nodename();
288 }
289
290 my $server_run = sub {
291 my ($self, $debug) = @_;
292
293 # fixme: handle restart lockfd
294 &$lockpidfile($self);
295
296 # remove FD_CLOEXEC bit to reuse on exec
297 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
298
299 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
300
301 # run in background
302 my $spid;
303
304 $self->{debug} = 1 if $debug;
305
306 $self->init();
307
308 if (!$debug) {
309 open STDIN, '</dev/null' || die "can't read /dev/null";
310 open STDOUT, '>/dev/null' || die "can't write /dev/null";
311 }
312
313 if (!$self->{env_restart_pve_daemon} && !$debug) {
314 PVE::INotify::inotify_close();
315 $spid = fork();
316 if (!defined ($spid)) {
317 die "can't put server into background - fork failed";
318 } elsif ($spid) { # parent
319 exit (0);
320 }
321 PVE::INotify::inotify_init();
322 }
323
324 if ($self->{env_restart_pve_daemon}) {
325 syslog('info' , "restarting server");
326 } else {
327 &$writepidfile($self);
328 syslog('info' , "starting server");
329 }
330
331 POSIX::setsid();
332
333 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
334
335 my $old_sig_term = $SIG{TERM};
336 local $SIG{TERM} = sub {
337 local ($@, $!, $?); # do not overwrite error vars
338 syslog('info', "received signal TERM");
339 &$terminate_server($self, 0);
340 &$server_cleanup($self);
341 &$old_sig_term(@_) if $old_sig_term;
342 };
343
344 my $old_sig_quit = $SIG{QUIT};
345 local $SIG{QUIT} = sub {
346 local ($@, $!, $?); # do not overwrite error vars
347 syslog('info', "received signal QUIT");
348 &$terminate_server($self, 0);
349 &$server_cleanup($self);
350 &$old_sig_quit(@_) if $old_sig_quit;
351 };
352
353 my $old_sig_int = $SIG{INT};
354 local $SIG{INT} = sub {
355 local ($@, $!, $?); # do not overwrite error vars
356 syslog('info', "received signal INT");
357 $SIG{INT} = 'DEFAULT'; # allow to terminate now
358 &$terminate_server($self, 0);
359 &$server_cleanup($self);
360 &$old_sig_int(@_) if $old_sig_int;
361 };
362
363 $SIG{HUP} = sub {
364 local ($@, $!, $?); # do not overwrite error vars
365 syslog('info', "received signal HUP");
366 $self->{got_hup_signal} = 1;
367 if ($self->{max_workers}) {
368 &$terminate_server($self, 1);
369 } elsif ($self->can('hup')) {
370 eval { $self->hup() };
371 warn $@ if $@;
372 }
373 };
374
375 eval {
376 if ($self->{max_workers}) {
377 my $old_sig_chld = $SIG{CHLD};
378 local $SIG{CHLD} = sub {
379 local ($@, $!, $?); # do not overwrite error vars
380 &$finish_workers($self);
381 &$old_sig_chld(@_) if $old_sig_chld;
382 };
383
384 # now loop forever (until we receive terminate signal)
385 for (;;) {
386 &$start_workers($self);
387 sleep(5);
388 &$terminate_old_workers($self);
389 &$finish_workers($self);
390 last if $self->{terminate};
391 }
392
393 } else {
394 $self->run();
395 }
396 };
397 my $err = $@;
398
399 if ($err) {
400 syslog ('err', "ERROR: $err");
401
402 &$terminate_server($self, 1);
403
404 if (my $wait_time = $self->{restart_on_error}) {
405 $self->restart_daemon($wait_time);
406 } else {
407 $self->exit_daemon(-1);
408 }
409 }
410
411 if ($self->{got_hup_signal}) {
412 $self->restart_daemon();
413 } else {
414 $self->exit_daemon(0);
415 }
416 };
417
418 sub new {
419 my ($this, $name, $cmdline, %params) = @_;
420
421 $name = 'daemon' if !$name; # should not happen
422
423 my $self;
424
425 eval {
426 my $class = ref($this) || $this;
427
428 $self = bless {
429 name => $name,
430 pidfile => "/var/run/${name}.pid",
431 workers => {},
432 old_workers => {},
433 }, $class;
434
435
436 foreach my $opt (keys %params) {
437 my $value = $params{$opt};
438 if ($opt eq 'restart_on_error') {
439 $self->{$opt} = $value;
440 } elsif ($opt eq 'stop_wait_time') {
441 $self->{$opt} = $value;
442 } elsif ($opt eq 'pidfile') {
443 $self->{$opt} = $value;
444 } elsif ($opt eq 'max_workers') {
445 $self->{$opt} = $value;
446 } elsif ($opt eq 'leave_children_open_on_reload') {
447 $self->{$opt} = $value;
448 } elsif ($opt eq 'setgid') {
449 $self->{$opt} = $value;
450 } elsif ($opt eq 'setuid') {
451 $self->{$opt} = $value;
452 } else {
453 die "unknown daemon option '$opt'\n";
454 }
455 }
456
457
458 # untaint
459 $self->{cmdline} = [map { /^(.*)$/ } @$cmdline];
460
461 $0 = $name;
462 };
463 if (my $err = $@) {
464 &$log_err($err);
465 exit(-1);
466 }
467
468 return $self;
469 }
470
471 sub exit_daemon {
472 my ($self, $status) = @_;
473
474 syslog("info", "server stopped");
475
476 &$server_cleanup($self);
477
478 exit($status);
479 }
480
481 sub restart_daemon {
482 my ($self, $waittime) = @_;
483
484 syslog('info', "server shutdown (restart)");
485
486 $ENV{RESTART_PVE_DAEMON} = 1;
487
488 foreach my $ds (@$daemon_sockets) {
489 $ds->fcntl(Fcntl::F_SETFD(), 0);
490 }
491
492 if ($self->{max_workers}) {
493 my @workers = (keys %{$self->{workers}}, keys %{$self->{old_workers}});
494 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
495 }
496
497 sleep($waittime) if $waittime; # avoid high server load due to restarts
498
499 PVE::INotify::inotify_close();
500
501 exec (@{$self->{cmdline}});
502
503 exit (-1); # never reached?
504 }
505
506 # please overwrite in subclass
507 # this is called at startup - before forking
508 sub init {
509 my ($self) = @_;
510
511 }
512
513 # please overwrite in subclass
514 sub shutdown {
515 my ($self) = @_;
516
517 syslog('info' , "server closing");
518
519 if (!$self->{max_workers}) {
520 # wait for children
521 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
522 }
523 }
524
525 # please define in subclass
526 #sub hup {
527 # my ($self) = @_;
528 #
529 # syslog('info' , "received signal HUP (restart)");
530 #}
531
532 # please overwrite in subclass
533 sub run {
534 my ($self) = @_;
535
536 for (;;) { # forever
537 syslog('info' , "server is running");
538 sleep(5);
539 }
540 }
541
542 sub start {
543 my ($self, $debug) = @_;
544
545 eval {
546 $self->setup();
547 &$server_run($self, $debug);
548 };
549 if (my $err = $@) {
550 &$log_err("start failed - $err");
551 exit(-1);
552 }
553 }
554
555 my $read_pid = sub {
556 my ($self) = @_;
557
558 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
559
560 return 0 if !$pid_str;
561
562 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
563
564 my $pid = int($1);
565
566 return $pid;
567 };
568
569 # checks if the process was started by systemd
570 my $init_ppid = sub {
571
572 if (getppid() == 1) {
573 return 1;
574 } else {
575 return 0;
576 }
577 };
578
579 sub running {
580 my ($self) = @_;
581
582 my $pid = &$read_pid($self);
583
584 if ($pid) {
585 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
586 return wantarray ? ($res, $pid) : $res;
587 }
588
589 return wantarray ? (0, 0) : 0;
590 }
591
592 sub stop {
593 my ($self) = @_;
594
595 my $pid = &$read_pid($self);
596
597 return if !$pid;
598
599 if (PVE::ProcFSTools::check_process_running($pid)) {
600 kill(15, $pid); # send TERM signal
601 # give some time
602 my $wait_time = $self->{stop_wait_time} || 5;
603 my $running = 1;
604 for (my $i = 0; $i < $wait_time; $i++) {
605 $running = PVE::ProcFSTools::check_process_running($pid);
606 last if !$running;
607 sleep (1);
608 }
609
610 syslog('err', "server still running - send KILL") if $running;
611
612 # to be sure
613 kill(9, $pid);
614 waitpid($pid, 0);
615 }
616
617 if (-f $self->{pidfile}) {
618 eval {
619 # try to get the lock
620 &$lockpidfile($self);
621 &$server_cleanup($self);
622 };
623 if (my $err = $@) {
624 &$log_err("cleanup failed - $err");
625 }
626 }
627 }
628
629 sub register_start_command {
630 my ($self, $description) = @_;
631
632 my $class = ref($self);
633
634 $class->register_method({
635 name => 'start',
636 path => 'start',
637 method => 'POST',
638 description => $description || "Start the daemon.",
639 parameters => {
640 additionalProperties => 0,
641 properties => {
642 debug => {
643 description => "Debug mode - stay in foreground",
644 type => "boolean",
645 optional => 1,
646 default => 0,
647 },
648 },
649 },
650 returns => { type => 'null' },
651
652 code => sub {
653 my ($param) = @_;
654
655 if (&$init_ppid() || $param->{debug}) {
656 $self->start($param->{debug});
657 } else {
658 PVE::Tools::run_command(['systemctl', 'start', $self->{name}]);
659 }
660
661 return undef;
662 }});
663 }
664
665 my $reload_daemon = sub {
666 my ($self, $use_hup) = @_;
667
668 if ($self->{env_restart_pve_daemon}) {
669 $self->start();
670 } else {
671 my ($running, $pid) = $self->running();
672 if (!$running) {
673 $self->start();
674 } else {
675 if ($use_hup) {
676 syslog('info', "send HUP to $pid");
677 kill 1, $pid;
678 } else {
679 $self->stop();
680 $self->start();
681 }
682 }
683 }
684 };
685
686 sub register_restart_command {
687 my ($self, $use_hup, $description) = @_;
688
689 my $class = ref($self);
690
691 $class->register_method({
692 name => 'restart',
693 path => 'restart',
694 method => 'POST',
695 description => $description || "Restart the daemon (or start if not running).",
696 parameters => {
697 additionalProperties => 0,
698 properties => {},
699 },
700 returns => { type => 'null' },
701
702 code => sub {
703 my ($param) = @_;
704
705 if (&$init_ppid()) {
706 &$reload_daemon($self, $use_hup);
707 } else {
708 PVE::Tools::run_command(['systemctl', $use_hup ? 'reload-or-restart' : 'restart', $self->{name}]);
709 }
710
711 return undef;
712 }});
713 }
714
715 sub register_reload_command {
716 my ($self, $description) = @_;
717
718 my $class = ref($self);
719
720 $class->register_method({
721 name => 'reload',
722 path => 'reload',
723 method => 'POST',
724 description => $description || "Reload daemon configuration (or start if not running).",
725 parameters => {
726 additionalProperties => 0,
727 properties => {},
728 },
729 returns => { type => 'null' },
730
731 code => sub {
732 my ($param) = @_;
733
734 &$reload_daemon($self, 1);
735
736 return undef;
737 }});
738 }
739
740 sub register_stop_command {
741 my ($self, $description) = @_;
742
743 my $class = ref($self);
744
745 $class->register_method({
746 name => 'stop',
747 path => 'stop',
748 method => 'POST',
749 description => $description || "Stop the daemon.",
750 parameters => {
751 additionalProperties => 0,
752 properties => {},
753 },
754 returns => { type => 'null' },
755
756 code => sub {
757 my ($param) = @_;
758
759 if (&$init_ppid()) {
760 $self->stop();
761 } else {
762 PVE::Tools::run_command(['systemctl', 'stop', $self->{name}]);
763 }
764
765 return undef;
766 }});
767 }
768
769 sub register_status_command {
770 my ($self, $description) = @_;
771
772 my $class = ref($self);
773
774 $class->register_method({
775 name => 'status',
776 path => 'status',
777 method => 'GET',
778 description => "Get daemon status.",
779 parameters => {
780 additionalProperties => 0,
781 properties => {},
782 },
783 returns => {
784 type => 'string',
785 enum => ['stopped', 'running'],
786 },
787 code => sub {
788 my ($param) = @_;
789
790 return $self->running() ? 'running' : 'stopped';
791 }});
792 }
793
794 # some useful helper
795
796 sub create_reusable_socket {
797 my ($self, $port, $host, $family) = @_;
798
799 die "no port specifed" if !$port;
800
801 my ($socket, $sockfd);
802
803 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
804 $self->{env_restart_pve_daemon}) {
805
806 die "unable to parse socket fd '$sockfd'\n"
807 if $sockfd !~ m/^(\d+)$/;
808 $sockfd = $1; # untaint
809
810 $socket = IO::Socket::IP->new;
811 $socket->fdopen($sockfd, 'w') ||
812 die "cannot fdopen file descriptor '$sockfd' - $!\n";
813
814 $socket->fcntl(Fcntl::F_SETFD(), Fcntl::FD_CLOEXEC);
815 } else {
816
817 $socket = IO::Socket::IP->new(
818 LocalAddr => $host,
819 LocalPort => $port,
820 Listen => SOMAXCONN,
821 Family => $family,
822 Proto => 'tcp',
823 GetAddrInfoFlags => 0,
824 ReuseAddr => 1) ||
825 die "unable to create socket - $@\n";
826
827 # we often observe delays when using Nagle algorithm,
828 # so we disable that to maximize performance
829 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
830
831 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
832 }
833
834 push @$daemon_sockets, $socket;
835
836 return $socket;
837 }
838
839
840 1;
841