]> git.proxmox.com Git - pve-common.git/blob - data/PVE/Daemon.pm
264f8be5e2a840aa7969a47ef95fbda017d7cbc8
[pve-common.git] / data / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
16
17 use strict;
18 use warnings;
19 use English;
20
21 use PVE::SafeSyslog;
22 use PVE::INotify;
23
24 use POSIX ":sys_wait_h";
25 use Fcntl ':flock';
26 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use IO::Socket::INET;
28
29 use Getopt::Long;
30 use Time::HiRes qw (gettimeofday);
31
32 use base qw(PVE::CLIHandler);
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37
38 my $close_daemon_lock = sub {
39 my ($self) = @_;
40
41 return if !$self->{daemon_lock_fh};
42
43 close $self->{daemon_lock_fh};
44 delete $self->{daemon_lock_fh};
45 };
46
47 my $log_err = sub {
48 my ($msg) = @_;
49 chomp $msg;
50 print STDERR "$msg\n";
51 syslog('err', "%s", $msg);
52 };
53
54 # call this if you fork() from child
55 # Note: we already call this for workers, so it is only required
56 # if you fork inside a simple daemon (max_workers == 0).
57 sub after_fork_cleanup {
58 my ($self) = @_;
59
60 &$close_daemon_lock($self);
61
62 PVE::INotify::inotify_close();
63
64 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
65 $SIG{$sig} = 'DEFAULT'; # restore default handler
66 # AnyEvent signals only works if $SIG{XX} is
67 # undefined (perl event loop)
68 delete $SIG{$sig}; # so that we can handle events with AnyEvent
69 }
70 }
71
72 my $lockpidfile = sub {
73 my ($self) = @_;
74
75 my $lkfn = $self->{pidfile} . ".lock";
76
77 my $waittime = 0;
78
79 if (my $fd = $self->{env_pve_lock_fd}) {
80
81 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
82
83 } else {
84
85 $waittime = 5;
86 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
87 }
88
89 if (!$self->{daemon_lock_fh}) {
90 die "can't open lock '$lkfn' - $!\n";
91 }
92
93 for (my $i = 0; $i < $waittime; $i ++) {
94 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
95 sleep(1);
96 }
97
98 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
99 &$close_daemon_lock($self);
100 my $err = $!;
101
102 my ($running, $pid) = $self->running();
103 if ($running) {
104 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
105 } else {
106 die "can't aquire lock '$lkfn' - $err\n";
107 }
108 }
109 };
110
111 my $writepidfile = sub {
112 my ($self) = @_;
113
114 my $pidfile = $self->{pidfile};
115
116 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
117
118 print PIDFH "$$\n";
119 close (PIDFH);
120 };
121
122 my $server_cleanup = sub {
123 my ($self) = @_;
124
125 unlink $self->{pidfile} . ".lock";
126 unlink $self->{pidfile};
127 };
128
129 my $finish_workers = sub {
130 my ($self) = @_;
131
132 foreach my $id (qw(workers old_workers)) {
133 foreach my $cpid (keys %{$self->{$id}}) {
134 my $waitpid = waitpid($cpid, WNOHANG);
135 if (defined($waitpid) && ($waitpid == $cpid)) {
136 delete ($self->{$id}->{$cpid});
137 syslog('info', "worker $cpid finished");
138 }
139 }
140 }
141 };
142
143 my $start_workers = sub {
144 my ($self) = @_;
145
146 return if $self->{terminate};
147
148 my $count = 0;
149 foreach my $cpid (keys %{$self->{workers}}) {
150 $count++;
151 }
152
153 my $need = $self->{max_workers} - $count;
154
155 return if $need <= 0;
156
157 syslog('info', "starting $need worker(s)");
158
159 while ($need > 0) {
160 my $pid = fork;
161
162 if (!defined ($pid)) {
163 syslog('err', "can't fork worker");
164 sleep (1);
165 } elsif ($pid) { # parent
166 $self->{workers}->{$pid} = 1;
167 syslog('info', "worker $pid started");
168 $need--;
169 } else {
170 $0 = "$self->{name} worker";
171
172 $self->after_fork_cleanup();
173
174 eval { $self->run(); };
175 if (my $err = $@) {
176 syslog('err', $err);
177 sleep(5); # avoid fast restarts
178 }
179
180 syslog('info', "worker exit");
181 exit (0);
182 }
183 }
184 };
185
186 my $terminate_server = sub {
187 my ($self, $allow_open_children) = @_;
188
189 $self->{terminate} = 1; # set flag to avoid worker restart
190
191 if (!$self->{max_workers}) {
192 eval { $self->shutdown(); };
193 warn $@ if $@;
194 return;
195 }
196
197 eval { $self->shutdown(); };
198 warn $@ if $@;
199
200 # we have workers - send TERM signal
201
202 foreach my $cpid (keys %{$self->{workers}}) {
203 kill(15, $cpid); # TERM childs
204 }
205
206 # if configured, leave children running on HUP
207 return if $allow_open_children &&
208 $self->{leave_children_open_on_reload};
209
210 # else, send TERM to old workers
211 foreach my $cpid (keys %{$self->{old_workers}}) {
212 kill(15, $cpid); # TERM childs
213 }
214
215 # nicely shutdown childs (give them max 10 seconds to shut down)
216 my $previous_alarm = alarm(10);
217 eval {
218 local $SIG{ALRM} = sub { die "timeout\n" };
219
220 while ((my $pid = waitpid (-1, 0)) > 0) {
221 foreach my $id (qw(workers old_workers)) {
222 if (defined($self->{$id}->{$pid})) {
223 delete($self->{$id}->{$pid});
224 syslog('info', "worker $pid finished");
225 }
226 }
227 }
228 alarm(0); # avoid race condition
229 };
230 my $err = $@;
231
232 alarm ($previous_alarm);
233
234 if ($err) {
235 syslog('err', "error stopping workers (will kill them now) - $err");
236 foreach my $id (qw(workers old_workers)) {
237 foreach my $cpid (keys %{$self->{$id}}) {
238 # KILL childs still alive!
239 if (kill (0, $cpid)) {
240 delete($self->{$id}->{$cpid});
241 syslog("err", "kill worker $cpid");
242 kill(9, $cpid);
243 # fixme: waitpid?
244 }
245 }
246 }
247 }
248 };
249
250 my $server_run = sub {
251 my ($self, $debug) = @_;
252
253 # fixme: handle restart lockfd
254 &$lockpidfile($self);
255
256 # remove FD_CLOEXEC bit to reuse on exec
257 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
258
259 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
260
261 # run in background
262 my $spid;
263
264 $self->{debug} = 1 if $debug;
265
266 $self->init();
267
268 if (!$debug) {
269 open STDIN, '</dev/null' || die "can't read /dev/null";
270 open STDOUT, '>/dev/null' || die "can't write /dev/null";
271 }
272
273 if (!$self->{env_restart_pve_daemon} && !$debug) {
274 PVE::INotify::inotify_close();
275 $spid = fork();
276 if (!defined ($spid)) {
277 die "can't put server into background - fork failed";
278 } elsif ($spid) { # parent
279 exit (0);
280 }
281 PVE::INotify::inotify_init();
282 }
283
284 if ($self->{env_restart_pve_daemon}) {
285 syslog('info' , "restarting server");
286 } else {
287 &$writepidfile($self);
288 syslog('info' , "starting server");
289 }
290
291 POSIX::setsid();
292
293 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
294
295 my $old_sig_term = $SIG{TERM};
296 local $SIG{TERM} = sub {
297 local ($@, $!, $?); # do not overwrite error vars
298 syslog('info', "received signal TERM");
299 &$terminate_server($self, 0);
300 &$server_cleanup($self);
301 &$old_sig_term(@_) if $old_sig_term;
302 };
303
304 my $old_sig_quit = $SIG{QUIT};
305 local $SIG{QUIT} = sub {
306 local ($@, $!, $?); # do not overwrite error vars
307 syslog('info', "received signal QUIT");
308 &$terminate_server($self, 0);
309 &$server_cleanup($self);
310 &$old_sig_quit(@_) if $old_sig_quit;
311 };
312
313 my $old_sig_int = $SIG{INT};
314 local $SIG{INT} = sub {
315 local ($@, $!, $?); # do not overwrite error vars
316 syslog('info', "received signal INT");
317 $SIG{INT} = 'DEFAULT'; # allow to terminate now
318 &$terminate_server($self, 0);
319 &$server_cleanup($self);
320 &$old_sig_int(@_) if $old_sig_int;
321 };
322
323 $SIG{HUP} = sub {
324 local ($@, $!, $?); # do not overwrite error vars
325 syslog('info', "received signal HUP");
326 $self->{got_hup_signal} = 1;
327 if ($self->{max_workers}) {
328 &$terminate_server($self, 1);
329 } elsif ($self->can('hup')) {
330 eval { $self->hup() };
331 warn $@ if $@;
332 }
333 };
334
335 eval {
336 if ($self->{max_workers}) {
337 my $old_sig_chld = $SIG{CHLD};
338 local $SIG{CHLD} = sub {
339 local ($@, $!, $?); # do not overwrite error vars
340 &$finish_workers($self);
341 &$old_sig_chld(@_) if $old_sig_chld;
342 };
343
344 # catch worker finished during restart phase
345 &$finish_workers($self);
346
347 # now loop forever (until we receive terminate signal)
348 for (;;) {
349 &$start_workers($self);
350 sleep(5);
351 &$finish_workers($self);
352 last if $self->{terminate};
353 }
354
355 } else {
356 $self->run();
357 }
358 };
359 my $err = $@;
360
361 if ($err) {
362 syslog ('err', "ERROR: $err");
363
364 &$terminate_server($self, 1);
365
366 if (my $wait_time = $self->{restart_on_error}) {
367 $self->restart_daemon($wait_time);
368 } else {
369 $self->exit_daemon(-1);
370 }
371 }
372
373 if ($self->{got_hup_signal}) {
374 $self->restart_daemon();
375 } else {
376 $self->exit_daemon(0);
377 }
378 };
379
380 sub new {
381 my ($this, $name, $cmdline, %params) = @_;
382
383 $name = 'daemon' if !$name; # should not happen
384
385 initlog($name);
386
387 my $self;
388
389 eval {
390
391 my $restart = $ENV{RESTART_PVE_DAEMON};
392 delete $ENV{RESTART_PVE_DAEMON};
393
394 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
395 delete $ENV{PVE_DAEMON_LOCK_FD};
396
397 if (defined($lockfd)) {
398 die "unable to parse lock fd '$lockfd'\n"
399 if $lockfd !~ m/^(\d+)$/;
400 $lockfd = $1; # untaint
401 }
402
403 die "please run as root\n" if !$restart && ($> != 0);
404
405 die "can't create more that one PVE::Daemon" if $daemon_initialized;
406 $daemon_initialized = 1;
407
408 PVE::INotify::inotify_init();
409
410 my $class = ref($this) || $this;
411
412 $self = bless {
413 name => $name,
414 pidfile => "/var/run/${name}.pid",
415 env_restart_pve_daemon => $restart,
416 env_pve_lock_fd => $lockfd,
417 workers => {},
418 old_workers => {},
419 }, $class;
420
421
422 foreach my $opt (keys %params) {
423 my $value = $params{$opt};
424 if ($opt eq 'restart_on_error') {
425 $self->{$opt} = $value;
426 } elsif ($opt eq 'stop_wait_time') {
427 $self->{$opt} = $value;
428 } elsif ($opt eq 'pidfile') {
429 $self->{$opt} = $value;
430 } elsif ($opt eq 'max_workers') {
431 $self->{$opt} = $value;
432 } elsif ($opt eq 'leave_children_open_on_reload') {
433 $self->{$opt} = $value;
434 } elsif ($opt eq 'setgid') {
435 $self->{$opt} = $value;
436 } elsif ($opt eq 'setuid') {
437 $self->{$opt} = $value;
438 } else {
439 die "unknown daemon option '$opt'\n";
440 }
441 }
442
443 if (my $gidstr = $self->{setgid}) {
444 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
445 POSIX::setgid($gid) || die "setgid $gid failed - $!\n";
446 $EGID = "$gid $gid"; # this calls setgroups
447 # just to be sure
448 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
449 }
450
451 if (my $uidstr = $self->{setuid}) {
452 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
453 POSIX::setuid($uid) || die "setuid $uid failed - $!\n";
454 # just to be sure
455 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
456 }
457
458 if ($restart && $self->{max_workers}) {
459 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
460 foreach my $pid (split(':', $wpids)) {
461 if ($pid =~ m/^(\d+)$/) {
462 $self->{old_workers}->{$1} = 1;
463 }
464 }
465 }
466 }
467
468 $self->{nodename} = PVE::INotify::nodename();
469
470 $self->{cmdline} = [];
471
472 foreach my $el (@$cmdline) {
473 $el =~ m/^(.*)$/; # untaint
474 push @{$self->{cmdline}}, $1;
475 }
476
477 $0 = $name;
478 };
479 if (my $err = $@) {
480 &$log_err($err);
481 exit(-1);
482 }
483
484 return $self;
485 }
486
487 sub exit_daemon {
488 my ($self, $status) = @_;
489
490 syslog("info", "server stopped");
491
492 &$server_cleanup($self);
493
494 exit($status);
495 }
496
497 sub restart_daemon {
498 my ($self, $waittime) = @_;
499
500 syslog('info', "server shutdown (restart)");
501
502 $ENV{RESTART_PVE_DAEMON} = 1;
503
504 if ($self->{max_workers}) {
505 my @workers = keys %{$self->{workers}};
506 push @workers, keys %{$self->{old_workers}};
507 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
508 }
509
510 sleep($waittime) if $waittime; # avoid high server load due to restarts
511
512 PVE::INotify::inotify_close();
513
514 exec (@{$self->{cmdline}});
515
516 exit (-1); # never reached?
517 }
518
519 # please overwrite in subclass
520 # this is called at startup - before forking
521 sub init {
522 my ($self) = @_;
523
524 }
525
526 # please overwrite in subclass
527 sub shutdown {
528 my ($self) = @_;
529
530 syslog('info' , "server closing");
531
532 if (!$self->{max_workers}) {
533 # wait for children
534 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
535 }
536 }
537
538 # please define in subclass
539 #sub hup {
540 # my ($self) = @_;
541 #
542 # syslog('info' , "received signal HUP (restart)");
543 #}
544
545 # please overwrite in subclass
546 sub run {
547 my ($self) = @_;
548
549 for (;;) { # forever
550 syslog('info' , "server is running");
551 sleep(5);
552 }
553 }
554
555 sub start {
556 my ($self, $debug) = @_;
557
558 eval { &$server_run($self, $debug); };
559 if (my $err = $@) {
560 &$log_err("start failed - $err");
561 exit(-1);
562 }
563 }
564
565 my $read_pid = sub {
566 my ($self) = @_;
567
568 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
569
570 return 0 if !$pid_str;
571
572 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
573
574 my $pid = int($1);
575
576 return $pid;
577 };
578
579 sub running {
580 my ($self) = @_;
581
582 my $pid = &$read_pid($self);
583
584 if ($pid) {
585 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
586 return wantarray ? ($res, $pid) : $res;
587 }
588
589 return wantarray ? (0, 0) : 0;
590 }
591
592 sub stop {
593 my ($self) = @_;
594
595 my $pid = &$read_pid($self);
596
597 return if !$pid;
598
599 if (PVE::ProcFSTools::check_process_running($pid)) {
600 kill(15, $pid); # send TERM signal
601 # give some time
602 my $wait_time = $self->{stop_wait_time} || 5;
603 my $running = 1;
604 for (my $i = 0; $i < $wait_time; $i++) {
605 $running = PVE::ProcFSTools::check_process_running($pid);
606 last if !$running;
607 sleep (1);
608 }
609
610 syslog('err', "server still running - send KILL") if $running;
611
612 # to be sure
613 kill(9, $pid);
614 waitpid($pid, 0);
615 }
616
617 if (-f $self->{pidfile}) {
618 eval {
619 # try to get the lock
620 &$lockpidfile($self);
621 &$server_cleanup($self);
622 };
623 if (my $err = $@) {
624 &$log_err("cleanup failed - $err");
625 }
626 }
627 }
628
629 sub register_start_command {
630 my ($self, $description) = @_;
631
632 my $class = ref($self);
633
634 $class->register_method({
635 name => 'start',
636 path => 'start',
637 method => 'POST',
638 description => $description || "Start the daemon.",
639 parameters => {
640 additionalProperties => 0,
641 properties => {
642 debug => {
643 description => "Debug mode - stay in foreground",
644 type => "boolean",
645 optional => 1,
646 default => 0,
647 },
648 },
649 },
650 returns => { type => 'null' },
651
652 code => sub {
653 my ($param) = @_;
654
655 $self->start($param->{debug});
656
657 return undef;
658 }});
659 }
660
661 my $reload_daemon = sub {
662 my ($self, $use_hup) = @_;
663
664 if ($self->{env_restart_pve_daemon}) {
665 $self->start();
666 } else {
667 my ($running, $pid) = $self->running();
668 if (!$running) {
669 $self->start();
670 } else {
671 if ($use_hup) {
672 syslog('info', "send HUP to $pid");
673 kill 1, $pid;
674 } else {
675 $self->stop();
676 $self->start();
677 }
678 }
679 }
680 };
681
682 sub register_restart_command {
683 my ($self, $use_hup, $description) = @_;
684
685 my $class = ref($self);
686
687 $class->register_method({
688 name => 'restart',
689 path => 'restart',
690 method => 'POST',
691 description => $description || "Restart the daemon (or start if not running).",
692 parameters => {
693 additionalProperties => 0,
694 properties => {},
695 },
696 returns => { type => 'null' },
697
698 code => sub {
699 my ($param) = @_;
700
701 &$reload_daemon($self, $use_hup);
702
703 return undef;
704 }});
705 }
706
707 sub register_reload_command {
708 my ($self, $description) = @_;
709
710 my $class = ref($self);
711
712 $class->register_method({
713 name => 'reload',
714 path => 'reload',
715 method => 'POST',
716 description => $description || "Reload daemon configuration (or start if not running).",
717 parameters => {
718 additionalProperties => 0,
719 properties => {},
720 },
721 returns => { type => 'null' },
722
723 code => sub {
724 my ($param) = @_;
725
726 &$reload_daemon($self, 1);
727
728 return undef;
729 }});
730 }
731
732 sub register_stop_command {
733 my ($self, $description) = @_;
734
735 my $class = ref($self);
736
737 $class->register_method({
738 name => 'stop',
739 path => 'stop',
740 method => 'POST',
741 description => $description || "Stop the daemon.",
742 parameters => {
743 additionalProperties => 0,
744 properties => {},
745 },
746 returns => { type => 'null' },
747
748 code => sub {
749 my ($param) = @_;
750
751 $self->stop();
752
753 return undef;
754 }});
755 }
756
757 sub register_status_command {
758 my ($self, $description) = @_;
759
760 my $class = ref($self);
761
762 $class->register_method({
763 name => 'status',
764 path => 'status',
765 method => 'GET',
766 description => "Get daemon status.",
767 parameters => {
768 additionalProperties => 0,
769 properties => {},
770 },
771 returns => {
772 type => 'string',
773 enum => ['stopped', 'running'],
774 },
775 code => sub {
776 my ($param) = @_;
777
778 return $self->running() ? 'running' : 'stopped';
779 }});
780 }
781
782 # some useful helper
783
784 sub create_reusable_socket {
785 my ($self, $port, $host) = @_;
786
787 die "no port specifed" if !$port;
788
789 my ($socket, $sockfd);
790
791 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
792 $self->{env_restart_pve_daemon}) {
793
794 die "unable to parse socket fd '$sockfd'\n"
795 if $sockfd !~ m/^(\d+)$/;
796 $sockfd = $1; # untaint
797
798 $socket = IO::Socket::INET->new;
799 $socket->fdopen($sockfd, 'w') ||
800 die "cannot fdopen file descriptor '$sockfd' - $!\n";
801
802 } else {
803
804 $socket = IO::Socket::INET->new(
805 LocalAddr => $host,
806 LocalPort => $port,
807 Listen => SOMAXCONN,
808 Proto => 'tcp',
809 ReuseAddr => 1) ||
810 die "unable to create socket - $@\n";
811
812 # we often observe delays when using Nagle algorithm,
813 # so we disable that to maximize performance
814 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
815
816 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
817 }
818
819 # remove FD_CLOEXEC bit to reuse on exec
820 $socket->fcntl(Fcntl::F_SETFD(), 0);
821
822 return $socket;
823 }
824
825
826 1;
827