deps: moving skiplock breaks qemu-server << 4.0-109
[pve-common.git] / src / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15 # * run as different user using setuid/setgid
16
17 use strict;
18 use warnings;
19 use English;
20
21 use PVE::SafeSyslog;
22 use PVE::INotify;
23
24 use POSIX ":sys_wait_h";
25 use Fcntl ':flock';
26 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
27 use IO::Socket::IP;
28
29 use Getopt::Long;
30 use Time::HiRes qw (gettimeofday);
31
32 use base qw(PVE::CLIHandler);
33
34 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
35
36 my $daemon_initialized = 0; # we only allow one instance
37 my $daemon_sockets = [];
38
39 my $close_daemon_lock = sub {
40 my ($self) = @_;
41
42 return if !$self->{daemon_lock_fh};
43
44 close $self->{daemon_lock_fh};
45 delete $self->{daemon_lock_fh};
46 };
47
48 my $log_err = sub {
49 my ($msg) = @_;
50 chomp $msg;
51 print STDERR "$msg\n";
52 syslog('err', "%s", $msg);
53 };
54
55 # call this if you fork() from child
56 # Note: we already call this for workers, so it is only required
57 # if you fork inside a simple daemon (max_workers == 0).
58 sub after_fork_cleanup {
59 my ($self) = @_;
60
61 &$close_daemon_lock($self);
62
63 PVE::INotify::inotify_close();
64
65 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
66 $SIG{$sig} = 'DEFAULT'; # restore default handler
67 # AnyEvent signals only works if $SIG{XX} is
68 # undefined (perl event loop)
69 delete $SIG{$sig}; # so that we can handle events with AnyEvent
70 }
71 }
72
73 my $lockpidfile = sub {
74 my ($self) = @_;
75
76 my $lkfn = $self->{pidfile} . ".lock";
77
78 my $waittime = 0;
79
80 if (my $fd = $self->{env_pve_lock_fd}) {
81
82 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
83
84 } else {
85
86 $waittime = 5;
87 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
88 }
89
90 if (!$self->{daemon_lock_fh}) {
91 die "can't open lock '$lkfn' - $!\n";
92 }
93
94 for (my $i = 0; $i < $waittime; $i ++) {
95 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
96 sleep(1);
97 }
98
99 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
100 &$close_daemon_lock($self);
101 my $err = $!;
102
103 my ($running, $pid) = $self->running();
104 if ($running) {
105 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
106 } else {
107 die "can't aquire lock '$lkfn' - $err\n";
108 }
109 }
110 };
111
112 my $writepidfile = sub {
113 my ($self) = @_;
114
115 my $pidfile = $self->{pidfile};
116
117 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
118
119 print PIDFH "$$\n";
120 close (PIDFH);
121 };
122
123 my $server_cleanup = sub {
124 my ($self) = @_;
125
126 unlink $self->{pidfile} . ".lock";
127 unlink $self->{pidfile};
128 };
129
130 my $finish_workers = sub {
131 my ($self) = @_;
132
133 foreach my $id (qw(workers old_workers)) {
134 foreach my $cpid (keys %{$self->{$id}}) {
135 my $waitpid = waitpid($cpid, WNOHANG);
136 if (defined($waitpid) && ($waitpid == $cpid)) {
137 delete ($self->{$id}->{$cpid});
138 syslog('info', "worker $cpid finished");
139 }
140 }
141 }
142 };
143
144 my $start_workers = sub {
145 my ($self) = @_;
146
147 return if $self->{terminate};
148
149 my $count = 0;
150 foreach my $cpid (keys %{$self->{workers}}) {
151 $count++;
152 }
153
154 my $need = $self->{max_workers} - $count;
155
156 return if $need <= 0;
157
158 syslog('info', "starting $need worker(s)");
159
160 while ($need > 0) {
161 my $pid = fork;
162
163 if (!defined ($pid)) {
164 syslog('err', "can't fork worker");
165 sleep (1);
166 } elsif ($pid) { # parent
167 $self->{workers}->{$pid} = 1;
168 syslog('info', "worker $pid started");
169 $need--;
170 } else {
171 $0 = "$self->{name} worker";
172
173 $self->after_fork_cleanup();
174
175 eval { $self->run(); };
176 if (my $err = $@) {
177 syslog('err', $err);
178 sleep(5); # avoid fast restarts
179 }
180
181 syslog('info', "worker exit");
182 exit (0);
183 }
184 }
185 };
186
187 my $terminate_server = sub {
188 my ($self, $allow_open_children) = @_;
189
190 $self->{terminate} = 1; # set flag to avoid worker restart
191
192 if (!$self->{max_workers}) {
193 eval { $self->shutdown(); };
194 warn $@ if $@;
195 return;
196 }
197
198 eval { $self->shutdown(); };
199 warn $@ if $@;
200
201 # we have workers - send TERM signal
202
203 foreach my $cpid (keys %{$self->{workers}}) {
204 kill(15, $cpid); # TERM childs
205 }
206
207 # if configured, leave children running on HUP
208 return if $allow_open_children &&
209 $self->{leave_children_open_on_reload};
210
211 # else, send TERM to old workers
212 foreach my $cpid (keys %{$self->{old_workers}}) {
213 kill(15, $cpid); # TERM childs
214 }
215
216 # nicely shutdown childs (give them max 10 seconds to shut down)
217 my $previous_alarm = alarm(10);
218 eval {
219 local $SIG{ALRM} = sub { die "timeout\n" };
220
221 while ((my $pid = waitpid (-1, 0)) > 0) {
222 foreach my $id (qw(workers old_workers)) {
223 if (defined($self->{$id}->{$pid})) {
224 delete($self->{$id}->{$pid});
225 syslog('info', "worker $pid finished");
226 }
227 }
228 }
229 alarm(0); # avoid race condition
230 };
231 my $err = $@;
232
233 alarm ($previous_alarm);
234
235 if ($err) {
236 syslog('err', "error stopping workers (will kill them now) - $err");
237 foreach my $id (qw(workers old_workers)) {
238 foreach my $cpid (keys %{$self->{$id}}) {
239 # KILL childs still alive!
240 if (kill (0, $cpid)) {
241 delete($self->{$id}->{$cpid});
242 syslog("err", "kill worker $cpid");
243 kill(9, $cpid);
244 # fixme: waitpid?
245 }
246 }
247 }
248 }
249 };
250
251 sub setup {
252 my ($self) = @_;
253
254 initlog($self->{name});
255
256 my $restart = $ENV{RESTART_PVE_DAEMON};
257 delete $ENV{RESTART_PVE_DAEMON};
258 $self->{env_restart_pve_daemon} = $restart;
259
260 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
261 delete $ENV{PVE_DAEMON_LOCK_FD};
262 if (defined($lockfd)) {
263 die "unable to parse lock fd '$lockfd'\n"
264 if $lockfd !~ m/^(\d+)$/;
265 $lockfd = $1; # untaint
266 }
267 $self->{env_pve_lock_fd} = $lockfd;
268
269 die "please run as root\n" if !$restart && ($> != 0);
270
271 die "can't create more that one PVE::Daemon" if $daemon_initialized;
272 $daemon_initialized = 1;
273
274 PVE::INotify::inotify_init();
275
276 if (my $gidstr = $self->{setgid}) {
277 my $gid = getgrnam($gidstr) || die "getgrnam failed - $!\n";
278 POSIX::setgid($gid) || die "setgid $gid failed - $!\n";
279 $EGID = "$gid $gid"; # this calls setgroups
280 # just to be sure
281 die "detected strange gid\n" if !($GID eq "$gid $gid" && $EGID eq "$gid $gid");
282 }
283
284 if (my $uidstr = $self->{setuid}) {
285 my $uid = getpwnam($uidstr) || die "getpwnam failed - $!\n";
286 POSIX::setuid($uid) || die "setuid $uid failed - $!\n";
287 # just to be sure
288 die "detected strange uid\n" if !($UID == $uid && $EUID == $uid);
289 }
290
291 if ($restart && $self->{max_workers}) {
292 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
293 foreach my $pid (split(':', $wpids)) {
294 if ($pid =~ m/^(\d+)$/) {
295 $self->{old_workers}->{$1} = 1;
296 }
297 }
298 }
299 }
300
301 $self->{nodename} = PVE::INotify::nodename();
302 }
303
304 my $server_run = sub {
305 my ($self, $debug) = @_;
306
307 # fixme: handle restart lockfd
308 &$lockpidfile($self);
309
310 # remove FD_CLOEXEC bit to reuse on exec
311 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
312
313 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
314
315 # run in background
316 my $spid;
317
318 $self->{debug} = 1 if $debug;
319
320 $self->init();
321
322 if (!$debug) {
323 open STDIN, '</dev/null' || die "can't read /dev/null";
324 open STDOUT, '>/dev/null' || die "can't write /dev/null";
325 }
326
327 if (!$self->{env_restart_pve_daemon} && !$debug) {
328 PVE::INotify::inotify_close();
329 $spid = fork();
330 if (!defined ($spid)) {
331 die "can't put server into background - fork failed";
332 } elsif ($spid) { # parent
333 exit (0);
334 }
335 PVE::INotify::inotify_init();
336 }
337
338 if ($self->{env_restart_pve_daemon}) {
339 syslog('info' , "restarting server");
340 } else {
341 &$writepidfile($self);
342 syslog('info' , "starting server");
343 }
344
345 POSIX::setsid();
346
347 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
348
349 my $old_sig_term = $SIG{TERM};
350 local $SIG{TERM} = sub {
351 local ($@, $!, $?); # do not overwrite error vars
352 syslog('info', "received signal TERM");
353 &$terminate_server($self, 0);
354 &$server_cleanup($self);
355 &$old_sig_term(@_) if $old_sig_term;
356 };
357
358 my $old_sig_quit = $SIG{QUIT};
359 local $SIG{QUIT} = sub {
360 local ($@, $!, $?); # do not overwrite error vars
361 syslog('info', "received signal QUIT");
362 &$terminate_server($self, 0);
363 &$server_cleanup($self);
364 &$old_sig_quit(@_) if $old_sig_quit;
365 };
366
367 my $old_sig_int = $SIG{INT};
368 local $SIG{INT} = sub {
369 local ($@, $!, $?); # do not overwrite error vars
370 syslog('info', "received signal INT");
371 $SIG{INT} = 'DEFAULT'; # allow to terminate now
372 &$terminate_server($self, 0);
373 &$server_cleanup($self);
374 &$old_sig_int(@_) if $old_sig_int;
375 };
376
377 $SIG{HUP} = sub {
378 local ($@, $!, $?); # do not overwrite error vars
379 syslog('info', "received signal HUP");
380 $self->{got_hup_signal} = 1;
381 if ($self->{max_workers}) {
382 &$terminate_server($self, 1);
383 } elsif ($self->can('hup')) {
384 eval { $self->hup() };
385 warn $@ if $@;
386 }
387 };
388
389 eval {
390 if ($self->{max_workers}) {
391 my $old_sig_chld = $SIG{CHLD};
392 local $SIG{CHLD} = sub {
393 local ($@, $!, $?); # do not overwrite error vars
394 &$finish_workers($self);
395 &$old_sig_chld(@_) if $old_sig_chld;
396 };
397
398 # catch worker finished during restart phase
399 &$finish_workers($self);
400
401 # now loop forever (until we receive terminate signal)
402 for (;;) {
403 &$start_workers($self);
404 sleep(5);
405 &$finish_workers($self);
406 last if $self->{terminate};
407 }
408
409 } else {
410 $self->run();
411 }
412 };
413 my $err = $@;
414
415 if ($err) {
416 syslog ('err', "ERROR: $err");
417
418 &$terminate_server($self, 1);
419
420 if (my $wait_time = $self->{restart_on_error}) {
421 $self->restart_daemon($wait_time);
422 } else {
423 $self->exit_daemon(-1);
424 }
425 }
426
427 if ($self->{got_hup_signal}) {
428 $self->restart_daemon();
429 } else {
430 $self->exit_daemon(0);
431 }
432 };
433
434 sub new {
435 my ($this, $name, $cmdline, %params) = @_;
436
437 $name = 'daemon' if !$name; # should not happen
438
439 my $self;
440
441 eval {
442 my $class = ref($this) || $this;
443
444 $self = bless {
445 name => $name,
446 pidfile => "/var/run/${name}.pid",
447 workers => {},
448 old_workers => {},
449 }, $class;
450
451
452 foreach my $opt (keys %params) {
453 my $value = $params{$opt};
454 if ($opt eq 'restart_on_error') {
455 $self->{$opt} = $value;
456 } elsif ($opt eq 'stop_wait_time') {
457 $self->{$opt} = $value;
458 } elsif ($opt eq 'pidfile') {
459 $self->{$opt} = $value;
460 } elsif ($opt eq 'max_workers') {
461 $self->{$opt} = $value;
462 } elsif ($opt eq 'leave_children_open_on_reload') {
463 $self->{$opt} = $value;
464 } elsif ($opt eq 'setgid') {
465 $self->{$opt} = $value;
466 } elsif ($opt eq 'setuid') {
467 $self->{$opt} = $value;
468 } else {
469 die "unknown daemon option '$opt'\n";
470 }
471 }
472
473
474 # untaint
475 $self->{cmdline} = [map { /^(.*)$/ } @$cmdline];
476
477 $0 = $name;
478 };
479 if (my $err = $@) {
480 &$log_err($err);
481 exit(-1);
482 }
483
484 return $self;
485 }
486
487 sub exit_daemon {
488 my ($self, $status) = @_;
489
490 syslog("info", "server stopped");
491
492 &$server_cleanup($self);
493
494 exit($status);
495 }
496
497 sub restart_daemon {
498 my ($self, $waittime) = @_;
499
500 syslog('info', "server shutdown (restart)");
501
502 $ENV{RESTART_PVE_DAEMON} = 1;
503
504 foreach my $ds (@$daemon_sockets) {
505 $ds->fcntl(Fcntl::F_SETFD(), 0);
506 }
507
508 if ($self->{max_workers}) {
509 my @workers = keys %{$self->{workers}};
510 push @workers, keys %{$self->{old_workers}};
511 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
512 }
513
514 sleep($waittime) if $waittime; # avoid high server load due to restarts
515
516 PVE::INotify::inotify_close();
517
518 exec (@{$self->{cmdline}});
519
520 exit (-1); # never reached?
521 }
522
523 # please overwrite in subclass
524 # this is called at startup - before forking
525 sub init {
526 my ($self) = @_;
527
528 }
529
530 # please overwrite in subclass
531 sub shutdown {
532 my ($self) = @_;
533
534 syslog('info' , "server closing");
535
536 if (!$self->{max_workers}) {
537 # wait for children
538 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
539 }
540 }
541
542 # please define in subclass
543 #sub hup {
544 # my ($self) = @_;
545 #
546 # syslog('info' , "received signal HUP (restart)");
547 #}
548
549 # please overwrite in subclass
550 sub run {
551 my ($self) = @_;
552
553 for (;;) { # forever
554 syslog('info' , "server is running");
555 sleep(5);
556 }
557 }
558
559 sub start {
560 my ($self, $debug) = @_;
561
562 eval {
563 $self->setup();
564 &$server_run($self, $debug);
565 };
566 if (my $err = $@) {
567 &$log_err("start failed - $err");
568 exit(-1);
569 }
570 }
571
572 my $read_pid = sub {
573 my ($self) = @_;
574
575 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
576
577 return 0 if !$pid_str;
578
579 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
580
581 my $pid = int($1);
582
583 return $pid;
584 };
585
586 # checks if the process was started by systemd
587 my $init_ppid = sub {
588
589 if (getppid() == 1) {
590 return 1;
591 } else {
592 return 0;
593 }
594 };
595
596 sub running {
597 my ($self) = @_;
598
599 my $pid = &$read_pid($self);
600
601 if ($pid) {
602 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
603 return wantarray ? ($res, $pid) : $res;
604 }
605
606 return wantarray ? (0, 0) : 0;
607 }
608
609 sub stop {
610 my ($self) = @_;
611
612 my $pid = &$read_pid($self);
613
614 return if !$pid;
615
616 if (PVE::ProcFSTools::check_process_running($pid)) {
617 kill(15, $pid); # send TERM signal
618 # give some time
619 my $wait_time = $self->{stop_wait_time} || 5;
620 my $running = 1;
621 for (my $i = 0; $i < $wait_time; $i++) {
622 $running = PVE::ProcFSTools::check_process_running($pid);
623 last if !$running;
624 sleep (1);
625 }
626
627 syslog('err', "server still running - send KILL") if $running;
628
629 # to be sure
630 kill(9, $pid);
631 waitpid($pid, 0);
632 }
633
634 if (-f $self->{pidfile}) {
635 eval {
636 # try to get the lock
637 &$lockpidfile($self);
638 &$server_cleanup($self);
639 };
640 if (my $err = $@) {
641 &$log_err("cleanup failed - $err");
642 }
643 }
644 }
645
646 sub register_start_command {
647 my ($self, $description) = @_;
648
649 my $class = ref($self);
650
651 $class->register_method({
652 name => 'start',
653 path => 'start',
654 method => 'POST',
655 description => $description || "Start the daemon.",
656 parameters => {
657 additionalProperties => 0,
658 properties => {
659 debug => {
660 description => "Debug mode - stay in foreground",
661 type => "boolean",
662 optional => 1,
663 default => 0,
664 },
665 },
666 },
667 returns => { type => 'null' },
668
669 code => sub {
670 my ($param) = @_;
671
672 if (&$init_ppid() || $param->{debug}) {
673 $self->start($param->{debug});
674 } else {
675 PVE::Tools::run_command(['systemctl', 'start', $self->{name}]);
676 }
677
678 return undef;
679 }});
680 }
681
682 my $reload_daemon = sub {
683 my ($self, $use_hup) = @_;
684
685 if ($self->{env_restart_pve_daemon}) {
686 $self->start();
687 } else {
688 my ($running, $pid) = $self->running();
689 if (!$running) {
690 $self->start();
691 } else {
692 if ($use_hup) {
693 syslog('info', "send HUP to $pid");
694 kill 1, $pid;
695 } else {
696 $self->stop();
697 $self->start();
698 }
699 }
700 }
701 };
702
703 sub register_restart_command {
704 my ($self, $use_hup, $description) = @_;
705
706 my $class = ref($self);
707
708 $class->register_method({
709 name => 'restart',
710 path => 'restart',
711 method => 'POST',
712 description => $description || "Restart the daemon (or start if not running).",
713 parameters => {
714 additionalProperties => 0,
715 properties => {},
716 },
717 returns => { type => 'null' },
718
719 code => sub {
720 my ($param) = @_;
721
722 if (&$init_ppid()) {
723 &$reload_daemon($self, $use_hup);
724 } else {
725 PVE::Tools::run_command(['systemctl', $use_hup ? 'reload-or-restart' : 'restart', $self->{name}]);
726 }
727
728 return undef;
729 }});
730 }
731
732 sub register_reload_command {
733 my ($self, $description) = @_;
734
735 my $class = ref($self);
736
737 $class->register_method({
738 name => 'reload',
739 path => 'reload',
740 method => 'POST',
741 description => $description || "Reload daemon configuration (or start if not running).",
742 parameters => {
743 additionalProperties => 0,
744 properties => {},
745 },
746 returns => { type => 'null' },
747
748 code => sub {
749 my ($param) = @_;
750
751 &$reload_daemon($self, 1);
752
753 return undef;
754 }});
755 }
756
757 sub register_stop_command {
758 my ($self, $description) = @_;
759
760 my $class = ref($self);
761
762 $class->register_method({
763 name => 'stop',
764 path => 'stop',
765 method => 'POST',
766 description => $description || "Stop the daemon.",
767 parameters => {
768 additionalProperties => 0,
769 properties => {},
770 },
771 returns => { type => 'null' },
772
773 code => sub {
774 my ($param) = @_;
775
776 if (&$init_ppid()) {
777 $self->stop();
778 } else {
779 PVE::Tools::run_command(['systemctl', 'stop', $self->{name}]);
780 }
781
782 return undef;
783 }});
784 }
785
786 sub register_status_command {
787 my ($self, $description) = @_;
788
789 my $class = ref($self);
790
791 $class->register_method({
792 name => 'status',
793 path => 'status',
794 method => 'GET',
795 description => "Get daemon status.",
796 parameters => {
797 additionalProperties => 0,
798 properties => {},
799 },
800 returns => {
801 type => 'string',
802 enum => ['stopped', 'running'],
803 },
804 code => sub {
805 my ($param) = @_;
806
807 return $self->running() ? 'running' : 'stopped';
808 }});
809 }
810
811 # some useful helper
812
813 sub create_reusable_socket {
814 my ($self, $port, $host, $family) = @_;
815
816 die "no port specifed" if !$port;
817
818 my ($socket, $sockfd);
819
820 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
821 $self->{env_restart_pve_daemon}) {
822
823 die "unable to parse socket fd '$sockfd'\n"
824 if $sockfd !~ m/^(\d+)$/;
825 $sockfd = $1; # untaint
826
827 $socket = IO::Socket::IP->new;
828 $socket->fdopen($sockfd, 'w') ||
829 die "cannot fdopen file descriptor '$sockfd' - $!\n";
830
831 $socket->fcntl(Fcntl::F_SETFD(), Fcntl::FD_CLOEXEC);
832 } else {
833
834 $socket = IO::Socket::IP->new(
835 LocalAddr => $host,
836 LocalPort => $port,
837 Listen => SOMAXCONN,
838 Family => $family,
839 Proto => 'tcp',
840 GetAddrInfoFlags => 0,
841 ReuseAddr => 1) ||
842 die "unable to create socket - $@\n";
843
844 # we often observe delays when using Nagle algorithm,
845 # so we disable that to maximize performance
846 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
847
848 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
849 }
850
851 push @$daemon_sockets, $socket;
852
853 return $socket;
854 }
855
856
857 1;
858