]> git.proxmox.com Git - pve-common.git/blob - data/PVE/Daemon.pm
Daemon: add helper to create sockets
[pve-common.git] / data / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15
16 use strict;
17 use warnings;
18 use PVE::SafeSyslog;
19 use PVE::INotify;
20
21 use POSIX ":sys_wait_h";
22 use Fcntl ':flock';
23 use Socket qw(IPPROTO_TCP TCP_NODELAY SOMAXCONN);
24 use IO::Socket::INET;
25
26 use Getopt::Long;
27 use Time::HiRes qw (gettimeofday);
28
29 use base qw(PVE::CLIHandler);
30
31 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
32
33 my $daemon_initialized = 0; # we only allow one instance
34
35 my $close_daemon_lock = sub {
36 my ($self) = @_;
37
38 return if !$self->{daemon_lock_fh};
39
40 close $self->{daemon_lock_fh};
41 delete $self->{daemon_lock_fh};
42 };
43
44 my $log_err = sub {
45 my ($msg) = @_;
46 chomp $msg;
47 print STDERR "$msg\n";
48 syslog('err', "%s", $msg);
49 };
50
51 # call this if you fork() from child
52 # Note: we already call this for workers, so it is only required
53 # if you fork inside a simple daemon (max_workers == 0).
54 sub after_fork_cleanup {
55 my ($self) = @_;
56
57 &$close_daemon_lock($self);
58
59 PVE::INotify::inotify_close();
60
61 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
62 $SIG{$sig} = 'DEFAULT'; # restore default handler
63 # AnyEvent signals only works if $SIG{XX} is
64 # undefined (perl event loop)
65 delete $SIG{$sig}; # so that we can handle events with AnyEvent
66 }
67 }
68
69 my $lockpidfile = sub {
70 my ($self) = @_;
71
72 my $lkfn = $self->{pidfile} . ".lock";
73
74 my $waittime = 0;
75
76 if (my $fd = $self->{env_pve_lock_fd}) {
77
78 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
79
80 } else {
81
82 $waittime = 5;
83 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
84 }
85
86 if (!$self->{daemon_lock_fh}) {
87 die "can't open lock '$lkfn' - $!\n";
88 }
89
90 for (my $i = 0; $i < $waittime; $i ++) {
91 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
92 sleep(1);
93 }
94
95 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
96 &$close_daemon_lock($self);
97 my $err = $!;
98
99 my ($running, $pid) = $self->running();
100 if ($running) {
101 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
102 } else {
103 die "can't aquire lock '$lkfn' - $err\n";
104 }
105 }
106 };
107
108 my $writepidfile = sub {
109 my ($self) = @_;
110
111 my $pidfile = $self->{pidfile};
112
113 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
114
115 print PIDFH "$$\n";
116 close (PIDFH);
117 };
118
119 my $server_cleanup = sub {
120 my ($self) = @_;
121
122 unlink $self->{pidfile} . ".lock";
123 unlink $self->{pidfile};
124 };
125
126 my $finish_workers = sub {
127 my ($self) = @_;
128
129 foreach my $id (qw(workers old_workers)) {
130 foreach my $cpid (keys %{$self->{$id}}) {
131 my $waitpid = waitpid($cpid, WNOHANG);
132 if (defined($waitpid) && ($waitpid == $cpid)) {
133 delete ($self->{$id}->{$cpid});
134 syslog('info', "worker $cpid finished");
135 }
136 }
137 }
138 };
139
140 my $start_workers = sub {
141 my ($self) = @_;
142
143 return if $self->{terminate};
144
145 my $count = 0;
146 foreach my $cpid (keys %{$self->{workers}}) {
147 $count++;
148 }
149
150 my $need = $self->{max_workers} - $count;
151
152 return if $need <= 0;
153
154 syslog('info', "starting $need worker(s)");
155
156 while ($need > 0) {
157 my $pid = fork;
158
159 if (!defined ($pid)) {
160 syslog('err', "can't fork worker");
161 sleep (1);
162 } elsif ($pid) { # parent
163 $self->{workers}->{$pid} = 1;
164 syslog('info', "worker $pid started");
165 $need--;
166 } else {
167 $0 = "$self->{name} worker";
168
169 $self->after_fork_cleanup();
170
171 eval { $self->run(); };
172 if (my $err = $@) {
173 syslog('err', $err);
174 sleep(5); # avoid fast restarts
175 }
176
177 syslog('info', "worker exit");
178 exit (0);
179 }
180 }
181 };
182
183 my $terminate_server = sub {
184 my ($self) = @_;
185
186 $self->{terminate} = 1; # set flag to avoid worker restart
187
188 if (!$self->{max_workers}) {
189 eval { $self->shutdown(); };
190 warn $@ if $@;
191 return;
192 }
193
194 eval { $self->shutdown(); };
195 warn $@ if $@;
196
197 # we have workers - send TERM signal
198
199 foreach my $cpid (keys %{$self->{workers}}) {
200 kill(15, $cpid); # TERM childs
201 }
202
203 # if configured, leave children running on HUP
204 return if $self->{got_hup_signal} &&
205 $self->{leave_children_open_on_reload};
206
207 # else, send TERM to old workers
208 foreach my $cpid (keys %{$self->{old_workers}}) {
209 kill(15, $cpid); # TERM childs
210 }
211
212 # nicely shutdown childs (give them max 10 seconds to shut down)
213 my $previous_alarm = alarm(10);
214 eval {
215 local $SIG{ALRM} = sub { die "timeout\n" };
216
217 while ((my $pid = waitpid (-1, 0)) > 0) {
218 foreach my $id (qw(workers old_workers)) {
219 if (defined($self->{$id}->{$pid})) {
220 delete($self->{$id}->{$pid});
221 syslog('info', "worker $pid finished");
222 }
223 }
224 }
225 alarm(0); # avoid race condition
226 };
227 my $err = $@;
228
229 alarm ($previous_alarm);
230
231 if ($err) {
232 syslog('err', "error stopping workers (will kill them now) - $err");
233 foreach my $id (qw(workers old_workers)) {
234 foreach my $cpid (keys %{$self->{$id}}) {
235 # KILL childs still alive!
236 if (kill (0, $cpid)) {
237 delete($self->{$id}->{$cpid});
238 syslog("err", "kill worker $cpid");
239 kill(9, $cpid);
240 # fixme: waitpid?
241 }
242 }
243 }
244 }
245 };
246
247 my $server_run = sub {
248 my ($self, $debug) = @_;
249
250 # fixme: handle restart lockfd
251 &$lockpidfile($self);
252
253 # remove FD_CLOEXEC bit to reuse on exec
254 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
255
256 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
257
258 # run in background
259 my $spid;
260
261 $self->{debug} = 1 if $debug;
262
263 $self->init();
264
265 if (!$debug) {
266 open STDIN, '</dev/null' || die "can't read /dev/null";
267 open STDOUT, '>/dev/null' || die "can't write /dev/null";
268 }
269
270 if (!$self->{env_restart_pve_daemon} && !$debug) {
271 PVE::INotify::inotify_close();
272 $spid = fork();
273 if (!defined ($spid)) {
274 die "can't put server into background - fork failed";
275 } elsif ($spid) { # parent
276 exit (0);
277 }
278 PVE::INotify::inotify_init();
279 }
280
281 if ($self->{env_restart_pve_daemon}) {
282 syslog('info' , "restarting server");
283 } else {
284 &$writepidfile($self);
285 syslog('info' , "starting server");
286 }
287
288 POSIX::setsid();
289
290 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
291
292 my $old_sig_term = $SIG{TERM};
293 local $SIG{TERM} = sub {
294 local ($@, $!, $?); # do not overwrite error vars
295 syslog('info', "received signal TERM");
296 &$terminate_server($self);
297 &$server_cleanup($self);
298 &$old_sig_term(@_) if $old_sig_term;
299 };
300
301 my $old_sig_quit = $SIG{QUIT};
302 local $SIG{QUIT} = sub {
303 local ($@, $!, $?); # do not overwrite error vars
304 syslog('info', "received signal QUIT");
305 &$terminate_server($self);
306 &$server_cleanup($self);
307 &$old_sig_quit(@_) if $old_sig_quit;
308 };
309
310 my $old_sig_int = $SIG{INT};
311 local $SIG{INT} = sub {
312 local ($@, $!, $?); # do not overwrite error vars
313 syslog('info', "received signal INT");
314 $SIG{INT} = 'DEFAULT'; # allow to terminate now
315 &$terminate_server($self);
316 &$server_cleanup($self);
317 &$old_sig_int(@_) if $old_sig_int;
318 };
319
320 $SIG{HUP} = sub {
321 local ($@, $!, $?); # do not overwrite error vars
322 syslog('info', "received signal HUP");
323 $self->{got_hup_signal} = 1;
324 if ($self->{max_workers}) {
325 &$terminate_server($self);
326 } elsif ($self->can('hup')) {
327 eval { $self->hup() };
328 warn $@ if $@;
329 }
330 };
331
332 eval {
333 if ($self->{max_workers}) {
334 my $old_sig_chld = $SIG{CHLD};
335 local $SIG{CHLD} = sub {
336 local ($@, $!, $?); # do not overwrite error vars
337 &$finish_workers($self);
338 &$old_sig_chld(@_) if $old_sig_chld;
339 };
340
341 # catch worker finished during restart phase
342 &$finish_workers($self);
343
344 # now loop forever (until we receive terminate signal)
345 for (;;) {
346 &$start_workers($self);
347 sleep(5);
348 &$finish_workers($self);
349 last if $self->{terminate};
350 }
351
352 } else {
353 $self->run();
354 }
355 };
356 my $err = $@;
357
358 if ($err) {
359 syslog ('err', "ERROR: $err");
360
361 &$terminate_server($self);
362
363 if (my $wait_time = $self->{restart_on_error}) {
364 $self->restart_daemon($wait_time);
365 } else {
366 $self->exit_daemon(-1);
367 }
368 }
369
370 if ($self->{got_hup_signal}) {
371 $self->restart_daemon();
372 } else {
373 $self->exit_daemon(0);
374 }
375 };
376
377 sub new {
378 my ($this, $name, $cmdline, %params) = @_;
379
380 $name = 'daemon' if !$name; # should not happen
381
382 initlog($name);
383
384 my $self;
385
386 eval {
387
388 my $restart = $ENV{RESTART_PVE_DAEMON};
389 delete $ENV{RESTART_PVE_DAEMON};
390
391 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
392 delete $ENV{PVE_DAEMON_LOCK_FD};
393
394 if (defined($lockfd)) {
395 die "unable to parse lock fd '$lockfd'\n"
396 if $lockfd !~ m/^(\d+)$/;
397 $lockfd = $1; # untaint
398 }
399
400 die "please run as root\n" if !$restart && ($> != 0);
401
402 die "can't create more that one PVE::Daemon" if $daemon_initialized;
403 $daemon_initialized = 1;
404
405 PVE::INotify::inotify_init();
406
407 my $class = ref($this) || $this;
408
409 $self = bless {
410 name => $name,
411 run_dir => '/var/run',
412 env_restart_pve_daemon => $restart,
413 env_pve_lock_fd => $lockfd,
414 workers => {},
415 old_workers => {},
416 }, $class;
417
418 foreach my $opt (keys %params) {
419 my $value = $params{$opt};
420 if ($opt eq 'restart_on_error') {
421 $self->{$opt} = $value;
422 } elsif ($opt eq 'stop_wait_time') {
423 $self->{$opt} = $value;
424 } elsif ($opt eq 'run_dir') {
425 $self->{$opt} = $value;
426 } elsif ($opt eq 'max_workers') {
427 $self->{$opt} = $value;
428 } elsif ($opt eq 'leave_children_open_on_reload') {
429 $self->{$opt} = $value;
430 } else {
431 die "unknown daemon option '$opt'\n";
432 }
433 }
434
435 if ($restart && $self->{max_workers}) {
436 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
437 foreach my $pid (split(':', $wpids)) {
438 if ($pid =~ m/^(\d+)$/) {
439 $self->{old_workers}->{$1} = 1;
440 }
441 }
442 }
443 }
444
445 $self->{pidfile} = "$self->{run_dir}/${name}.pid";
446
447 $self->{nodename} = PVE::INotify::nodename();
448
449 $self->{cmdline} = [];
450
451 foreach my $el (@$cmdline) {
452 $el =~ m/^(.*)$/; # untaint
453 push @{$self->{cmdline}}, $1;
454 }
455
456 $0 = $name;
457 };
458 if (my $err = $@) {
459 &$log_err($err);
460 exit(-1);
461 }
462
463 return $self;
464 }
465
466 sub exit_daemon {
467 my ($self, $status) = @_;
468
469 syslog("info", "server stopped");
470
471 &$server_cleanup($self);
472
473 exit($status);
474 }
475
476 sub restart_daemon {
477 my ($self, $waittime) = @_;
478
479 syslog('info', "server shutdown (restart)");
480
481 $ENV{RESTART_PVE_DAEMON} = 1;
482
483 if ($self->{max_workers}) {
484 my @workers = keys %{$self->{workers}};
485 push @workers, keys %{$self->{old_workers}};
486 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
487 }
488
489 sleep($waittime) if $waittime; # avoid high server load due to restarts
490
491 PVE::INotify::inotify_close();
492
493 exec (@{$self->{cmdline}});
494
495 exit (-1); # never reached?
496 }
497
498 # please overwrite in subclass
499 # this is called at startup - before forking
500 sub init {
501 my ($self) = @_;
502
503 }
504
505 # please overwrite in subclass
506 sub shutdown {
507 my ($self) = @_;
508
509 syslog('info' , "server closing");
510
511 if (!$self->{max_workers}) {
512 # wait for children
513 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
514 }
515 }
516
517 # please define in subclass
518 #sub hup {
519 # my ($self) = @_;
520 #
521 # syslog('info' , "received signal HUP (restart)");
522 #}
523
524 # please overwrite in subclass
525 sub run {
526 my ($self) = @_;
527
528 for (;;) { # forever
529 syslog('info' , "server is running");
530 sleep(5);
531 }
532 }
533
534 sub start {
535 my ($self, $debug) = @_;
536
537 eval { &$server_run($self, $debug); };
538 if (my $err = $@) {
539 &$log_err("start failed - $err");
540 exit(-1);
541 }
542 }
543
544 my $read_pid = sub {
545 my ($self) = @_;
546
547 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
548
549 return 0 if !$pid_str;
550
551 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
552
553 my $pid = int($1);
554
555 return $pid;
556 };
557
558 sub running {
559 my ($self) = @_;
560
561 my $pid = &$read_pid($self);
562
563 if ($pid) {
564 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
565 return wantarray ? ($res, $pid) : $res;
566 }
567
568 return wantarray ? (0, 0) : 0;
569 }
570
571 sub stop {
572 my ($self) = @_;
573
574 my $pid = &$read_pid($self);
575
576 return if !$pid;
577
578 if (PVE::ProcFSTools::check_process_running($pid)) {
579 kill(15, $pid); # send TERM signal
580 # give some time
581 my $wait_time = $self->{stop_wait_time} || 5;
582 my $running = 1;
583 for (my $i = 0; $i < $wait_time; $i++) {
584 $running = PVE::ProcFSTools::check_process_running($pid);
585 last if !$running;
586 sleep (1);
587 }
588
589 syslog('err', "server still running - send KILL") if $running;
590
591 # to be sure
592 kill(9, $pid);
593 waitpid($pid, 0);
594 }
595
596 if (-f $self->{pidfile}) {
597 eval {
598 # try to get the lock
599 &$lockpidfile($self);
600 &$server_cleanup($self);
601 };
602 if (my $err = $@) {
603 &$log_err("cleanup failed - $err");
604 }
605 }
606 }
607
608 sub register_start_command {
609 my ($self, $description) = @_;
610
611 my $class = ref($self);
612
613 $class->register_method({
614 name => 'start',
615 path => 'start',
616 method => 'POST',
617 description => $description || "Start the daemon.",
618 parameters => {
619 additionalProperties => 0,
620 properties => {
621 debug => {
622 description => "Debug mode - stay in foreground",
623 type => "boolean",
624 optional => 1,
625 default => 0,
626 },
627 },
628 },
629 returns => { type => 'null' },
630
631 code => sub {
632 my ($param) = @_;
633
634 $self->start($param->{debug});
635
636 return undef;
637 }});
638 }
639
640 my $reload_daemon = sub {
641 my ($self, $use_hup) = @_;
642
643 if ($self->{env_restart_pve_daemon}) {
644 $self->start();
645 } else {
646 my ($running, $pid) = $self->running();
647 if (!$running) {
648 $self->start();
649 } else {
650 if ($use_hup) {
651 syslog('info', "send HUP to $pid");
652 kill 1, $pid;
653 } else {
654 $self->stop();
655 $self->start();
656 }
657 }
658 }
659 };
660
661 sub register_restart_command {
662 my ($self, $use_hup, $description) = @_;
663
664 my $class = ref($self);
665
666 $class->register_method({
667 name => 'restart',
668 path => 'restart',
669 method => 'POST',
670 description => $description || "Restart the daemon (or start if not running).",
671 parameters => {
672 additionalProperties => 0,
673 properties => {},
674 },
675 returns => { type => 'null' },
676
677 code => sub {
678 my ($param) = @_;
679
680 &$reload_daemon($self, $use_hup);
681
682 return undef;
683 }});
684 }
685
686 sub register_reload_command {
687 my ($self, $description) = @_;
688
689 my $class = ref($self);
690
691 $class->register_method({
692 name => 'reload',
693 path => 'reload',
694 method => 'POST',
695 description => $description || "Reload daemon configuration (or start if not running).",
696 parameters => {
697 additionalProperties => 0,
698 properties => {},
699 },
700 returns => { type => 'null' },
701
702 code => sub {
703 my ($param) = @_;
704
705 &$reload_daemon($self, 1);
706
707 return undef;
708 }});
709 }
710
711 sub register_stop_command {
712 my ($self, $description) = @_;
713
714 my $class = ref($self);
715
716 $class->register_method({
717 name => 'stop',
718 path => 'stop',
719 method => 'POST',
720 description => $description || "Stop the daemon.",
721 parameters => {
722 additionalProperties => 0,
723 properties => {},
724 },
725 returns => { type => 'null' },
726
727 code => sub {
728 my ($param) = @_;
729
730 $self->stop();
731
732 return undef;
733 }});
734 }
735
736 sub register_status_command {
737 my ($self, $description) = @_;
738
739 my $class = ref($self);
740
741 $class->register_method({
742 name => 'status',
743 path => 'status',
744 method => 'GET',
745 description => "Get daemon status.",
746 parameters => {
747 additionalProperties => 0,
748 properties => {},
749 },
750 returns => {
751 type => 'string',
752 enum => ['stopped', 'running'],
753 },
754 code => sub {
755 my ($param) = @_;
756
757 return $self->running() ? 'running' : 'stopped';
758 }});
759 }
760
761 # some useful helper
762
763 sub create_reusable_socket {
764 my ($self, $port, $host) = @_;
765
766 die "no port specifed" if !$port;
767
768 my ($socket, $sockfd);
769
770 if (defined($sockfd = $ENV{"PVE_DAEMON_SOCKET_$port"}) &&
771 $self->{env_restart_pve_daemon}) {
772
773 die "unable to parse socket fd '$sockfd'\n"
774 if $sockfd !~ m/^(\d+)$/;
775 $sockfd = $1; # untaint
776
777 $socket = IO::Socket::INET->new;
778 $socket->fdopen($sockfd, 'w') ||
779 die "cannot fdopen file descriptor '$sockfd' - $!\n";
780
781 } else {
782
783 $socket = IO::Socket::INET->new(
784 LocalAddr => $host,
785 LocalPort => $port,
786 Listen => SOMAXCONN,
787 Proto => 'tcp',
788 ReuseAddr => 1) ||
789 die "unable to create socket - $@\n";
790
791 # we often observe delays when using Nagle algorithm,
792 # so we disable that to maximize performance
793 setsockopt($socket, IPPROTO_TCP, TCP_NODELAY, 1);
794
795 $ENV{"PVE_DAEMON_SOCKET_$port"} = $socket->fileno;
796 }
797
798 # remove FD_CLOEXEC bit to reuse on exec
799 $socket->fcntl(Fcntl::F_SETFD(), 0);
800
801 return $socket;
802 }
803
804
805 1;
806