]> git.proxmox.com Git - pve-common.git/blob - data/PVE/Daemon.pm
7b3992051f82407418413a09ba9820755b270ca7
[pve-common.git] / data / PVE / Daemon.pm
1 package PVE::Daemon;
2
3 # Abstract class to implement Daemons
4 #
5 # Features:
6 # * lock and write PID file /var/run/$name.pid to make sure onyl
7 # one instance is running.
8 # * keep lock open during restart
9 # * correctly daemonize (redirect STDIN/STDOUT)
10 # * restart by stop/start, exec, or signal HUP
11 # * daemon restart on error (option 'restart_on_error')
12 # * handle worker processes (option 'max_workers')
13 # * allow to restart while workers are still runningl
14 # (option 'leave_children_open_on_reload')
15
16 use strict;
17 use warnings;
18 use PVE::SafeSyslog;
19 use PVE::INotify;
20
21 use POSIX ":sys_wait_h";
22 use Fcntl ':flock';
23 use Getopt::Long;
24 use Time::HiRes qw (gettimeofday);
25
26 use base qw(PVE::CLIHandler);
27
28 $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
29
30 my $daemon_initialized = 0; # we only allow one instance
31
32 my $close_daemon_lock = sub {
33 my ($self) = @_;
34
35 return if !$self->{daemon_lock_fh};
36
37 close $self->{daemon_lock_fh};
38 delete $self->{daemon_lock_fh};
39 };
40
41 my $log_err = sub {
42 my ($msg) = @_;
43 chomp $msg;
44 print STDERR "$msg\n";
45 syslog('err', "%s", $msg);
46 };
47
48 # call this if you fork() from child
49 # Note: we already call this for workers, so it is only required
50 # if you fork inside a simple daemon (max_workers == 0).
51 sub after_fork_cleanup {
52 my ($self) = @_;
53
54 &$close_daemon_lock($self);
55
56 PVE::INotify::inotify_close();
57
58 for my $sig (qw(CHLD HUP INT TERM QUIT)) {
59 $SIG{$sig} = 'DEFAULT'; # restore default handler
60 # AnyEvent signals only works if $SIG{XX} is
61 # undefined (perl event loop)
62 delete $SIG{$sig}; # so that we can handle events with AnyEvent
63 }
64 }
65
66 my $lockpidfile = sub {
67 my ($self) = @_;
68
69 my $lkfn = $self->{pidfile} . ".lock";
70
71 my $waittime = 0;
72
73 if (my $fd = $self->{env_pve_lock_fd}) {
74
75 $self->{daemon_lock_fh} = IO::Handle->new_from_fd($fd, "a");
76
77 } else {
78
79 $waittime = 5;
80 $self->{daemon_lock_fh} = IO::File->new(">>$lkfn");
81 }
82
83 if (!$self->{daemon_lock_fh}) {
84 die "can't open lock '$lkfn' - $!\n";
85 }
86
87 for (my $i = 0; $i < $waittime; $i ++) {
88 return if flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB);
89 sleep(1);
90 }
91
92 if (!flock ($self->{daemon_lock_fh}, LOCK_EX|LOCK_NB)) {
93 &$close_daemon_lock($self);
94 my $err = $!;
95
96 my ($running, $pid) = $self->running();
97 if ($running) {
98 die "can't aquire lock '$lkfn' - daemon already started (pid = $pid)\n";
99 } else {
100 die "can't aquire lock '$lkfn' - $err\n";
101 }
102 }
103 };
104
105 my $writepidfile = sub {
106 my ($self) = @_;
107
108 my $pidfile = $self->{pidfile};
109
110 die "can't open pid file '$pidfile' - $!\n" if !open (PIDFH, ">$pidfile");
111
112 print PIDFH "$$\n";
113 close (PIDFH);
114 };
115
116 my $server_cleanup = sub {
117 my ($self) = @_;
118
119 unlink $self->{pidfile} . ".lock";
120 unlink $self->{pidfile};
121 };
122
123 my $finish_workers = sub {
124 my ($self) = @_;
125
126 foreach my $id (qw(workers old_workers)) {
127 foreach my $cpid (keys %{$self->{$id}}) {
128 my $waitpid = waitpid($cpid, WNOHANG);
129 if (defined($waitpid) && ($waitpid == $cpid)) {
130 delete ($self->{$id}->{$cpid});
131 syslog('info', "worker $cpid finished");
132 }
133 }
134 }
135 };
136
137 my $start_workers = sub {
138 my ($self) = @_;
139
140 return if $self->{terminate};
141
142 my $count = 0;
143 foreach my $cpid (keys %{$self->{workers}}) {
144 $count++;
145 }
146
147 my $need = $self->{max_workers} - $count;
148
149 return if $need <= 0;
150
151 syslog('info', "starting $need worker(s)");
152
153 while ($need > 0) {
154 my $pid = fork;
155
156 if (!defined ($pid)) {
157 syslog('err', "can't fork worker");
158 sleep (1);
159 } elsif ($pid) { # parent
160 $self->{workers}->{$pid} = 1;
161 syslog('info', "worker $pid started");
162 $need--;
163 } else {
164 $0 = "$self->{name} worker";
165
166 $self->after_fork_cleanup();
167
168 eval { $self->run(); };
169 if (my $err = $@) {
170 syslog('err', $err);
171 sleep(5); # avoid fast restarts
172 }
173
174 syslog('info', "worker exit");
175 exit (0);
176 }
177 }
178 };
179
180 my $terminate_server = sub {
181 my ($self) = @_;
182
183 $self->{terminate} = 1; # set flag to avoid worker restart
184
185 if (!$self->{max_workers}) {
186 eval { $self->shutdown(); };
187 warn $@ if $@;
188 return;
189 }
190
191 eval { $self->shutdown(); };
192 warn $@ if $@;
193
194 # we have workers - terminate them
195
196 foreach my $cpid (keys %{$self->{workers}}) {
197 kill(15, $cpid); # TERM childs
198 }
199
200 return if $self->{got_hup_signal} &&
201 $self->{leave_children_open_on_reload};
202
203 # nicely shutdown childs (give them max 10 seconds to shut down)
204 my $previous_alarm = alarm(10);
205 eval {
206 local $SIG{ALRM} = sub { die "timeout\n" };
207
208 while ((my $pid = waitpid (-1, 0)) > 0) {
209 if (defined($self->{workers}->{$pid})) {
210 delete($self->{workers}->{$pid});
211 syslog('info', "worker $pid finished");
212 }
213 }
214 alarm(0); # avoid race condition
215 };
216 my $err = $@;
217
218 alarm ($previous_alarm);
219
220 if ($err) {
221 syslog('err', "error stopping workers (will kill them now) - $err");
222 foreach my $cpid (keys %{$self->{workers}}) {
223 # KILL childs still alive!
224 if (kill (0, $cpid)) {
225 delete($self->{workers}->{$cpid});
226 syslog("err", "kill worker $cpid");
227 kill(9, $cpid);
228 # fixme: waitpid?
229 }
230 }
231 }
232 };
233
234 my $server_run = sub {
235 my ($self, $debug) = @_;
236
237 # fixme: handle restart lockfd
238 &$lockpidfile($self);
239
240 # remove FD_CLOEXEC bit to reuse on exec
241 $self->{daemon_lock_fh}->fcntl(Fcntl::F_SETFD(), 0);
242
243 $ENV{PVE_DAEMON_LOCK_FD} = $self->{daemon_lock_fh}->fileno;
244
245 # run in background
246 my $spid;
247
248 $self->{debug} = 1 if $debug;
249
250 $self->init();
251
252 if (!$debug) {
253 open STDIN, '</dev/null' || die "can't read /dev/null";
254 open STDOUT, '>/dev/null' || die "can't write /dev/null";
255 }
256
257 if (!$self->{env_restart_pve_daemon} && !$debug) {
258 PVE::INotify::inotify_close();
259 $spid = fork();
260 if (!defined ($spid)) {
261 die "can't put server into background - fork failed";
262 } elsif ($spid) { # parent
263 exit (0);
264 }
265 PVE::INotify::inotify_init();
266 }
267
268 if ($self->{env_restart_pve_daemon}) {
269 syslog('info' , "restarting server");
270 } else {
271 &$writepidfile($self);
272 syslog('info' , "starting server");
273 }
274
275 POSIX::setsid();
276
277 open STDERR, '>&STDOUT' || die "can't close STDERR\n";
278
279 my $old_sig_term = $SIG{TERM};
280 local $SIG{TERM} = sub {
281 local ($@, $!, $?); # do not overwrite error vars
282 syslog('info', "received signal TERM");
283 &$terminate_server($self);
284 &$server_cleanup($self);
285 &$old_sig_term(@_) if $old_sig_term;
286 };
287
288 my $old_sig_quit = $SIG{QUIT};
289 local $SIG{QUIT} = sub {
290 local ($@, $!, $?); # do not overwrite error vars
291 syslog('info', "received signal QUIT");
292 &$terminate_server($self);
293 &$server_cleanup($self);
294 &$old_sig_quit(@_) if $old_sig_quit;
295 };
296
297 my $old_sig_int = $SIG{INT};
298 local $SIG{INT} = sub {
299 local ($@, $!, $?); # do not overwrite error vars
300 syslog('info', "received signal INT");
301 $SIG{INT} = 'DEFAULT'; # allow to terminate now
302 &$terminate_server($self);
303 &$server_cleanup($self);
304 &$old_sig_int(@_) if $old_sig_int;
305 };
306
307 $SIG{HUP} = sub {
308 local ($@, $!, $?); # do not overwrite error vars
309 syslog('info', "received signal HUP");
310 $self->{got_hup_signal} = 1;
311 if ($self->{max_workers}) {
312 &$terminate_server($self);
313 } elsif ($self->can('hup')) {
314 eval { $self->hup() };
315 warn $@ if $@;
316 }
317 };
318
319 eval {
320 if ($self->{max_workers}) {
321 my $old_sig_chld = $SIG{CHLD};
322 local $SIG{CHLD} = sub {
323 local ($@, $!, $?); # do not overwrite error vars
324 &$finish_workers($self);
325 &$old_sig_chld(@_) if $old_sig_chld;
326 };
327
328 for (;;) { # forever
329 &$start_workers($self);
330 sleep(5);
331 &$finish_workers($self);
332 last if $self->{terminate};
333 }
334
335 } else {
336 $self->run();
337 }
338 };
339 my $err = $@;
340
341 if ($err) {
342 syslog ('err', "ERROR: $err");
343
344 # fixme: kill all workers
345
346 if (my $wait_time = $self->{restart_on_error}) {
347 $self->restart_daemon($wait_time);
348 } else {
349 $self->exit_daemon(-1);
350 }
351 }
352
353 if ($self->{got_hup_signal}) {
354 $self->restart_daemon();
355 } else {
356 $self->exit_daemon(0);
357 }
358 };
359
360 sub new {
361 my ($this, $name, $cmdline, %params) = @_;
362
363 $name = 'daemon' if !$name; # should not happen
364
365 initlog($name);
366
367 my $self;
368
369 eval {
370
371 my $restart = $ENV{RESTART_PVE_DAEMON};
372 delete $ENV{RESTART_PVE_DAEMON};
373
374 my $lockfd = $ENV{PVE_DAEMON_LOCK_FD};
375 delete $ENV{PVE_DAEMON_LOCK_FD};
376
377 if (defined($lockfd)) {
378 die "unable to parse lock fd '$lockfd'\n"
379 if $lockfd !~ m/^(\d+)$/;
380 $lockfd = $1; # untaint
381 }
382
383 die "please run as root\n" if !$restart && ($> != 0);
384
385 die "can't create more that one PVE::Daemon" if $daemon_initialized;
386 $daemon_initialized = 1;
387
388 PVE::INotify::inotify_init();
389
390 my $class = ref($this) || $this;
391
392 $self = bless {
393 name => $name,
394 run_dir => '/var/run',
395 env_restart_pve_daemon => $restart,
396 env_pve_lock_fd => $lockfd,
397 workers => {},
398 old_workers => {},
399 }, $class;
400
401 foreach my $opt (keys %params) {
402 my $value = $params{$opt};
403 if ($opt eq 'restart_on_error') {
404 $self->{$opt} = $value;
405 } elsif ($opt eq 'stop_wait_time') {
406 $self->{$opt} = $value;
407 } elsif ($opt eq 'run_dir') {
408 $self->{$opt} = $value;
409 } elsif ($opt eq 'max_workers') {
410 $self->{$opt} = $value;
411 } elsif ($opt eq 'leave_children_open_on_reload') {
412 $self->{$opt} = $value;
413 } else {
414 die "unknown daemon option '$opt'\n";
415 }
416 }
417
418 if ($restart && $self->{max_workers}) {
419 if (my $wpids = $ENV{PVE_DAEMON_WORKER_PIDS}) {
420 foreach my $pid (split(':', $wpids)) {
421 if ($pid =~ m/^(\d+)$/) {
422 $self->{old_workers}->{$1} = 1;
423 }
424 }
425 }
426 }
427
428 $self->{pidfile} = "$self->{run_dir}/${name}.pid";
429
430 $self->{nodename} = PVE::INotify::nodename();
431
432 $self->{cmdline} = [];
433
434 foreach my $el (@$cmdline) {
435 $el =~ m/^(.*)$/; # untaint
436 push @{$self->{cmdline}}, $1;
437 }
438
439 $0 = $name;
440 };
441 if (my $err = $@) {
442 &$log_err($err);
443 exit(-1);
444 }
445
446 return $self;
447 }
448
449 sub exit_daemon {
450 my ($self, $status) = @_;
451
452 syslog("info", "server stopped");
453
454 &$server_cleanup($self);
455
456 exit($status);
457 }
458
459 sub restart_daemon {
460 my ($self, $waittime) = @_;
461
462 syslog('info', "server shutdown (restart)");
463
464 $ENV{RESTART_PVE_DAEMON} = 1;
465
466 if ($self->{max_workers}) {
467 my @workers = keys %{$self->{workers}};
468 push @workers, keys %{$self->{old_workers}};
469 $ENV{PVE_DAEMON_WORKER_PIDS} = join(':', @workers);
470 }
471
472 sleep($waittime) if $waittime; # avoid high server load due to restarts
473
474 PVE::INotify::inotify_close();
475
476 exec (@{$self->{cmdline}});
477
478 exit (-1); # never reached?
479 }
480
481 # please overwrite in subclass
482 # this is called at startup - before forking
483 sub init {
484 my ($self) = @_;
485
486 }
487
488 # please overwrite in subclass
489 sub shutdown {
490 my ($self) = @_;
491
492 syslog('info' , "server closing");
493
494 if (!$self->{max_workers}) {
495 # wait for children
496 1 while (waitpid(-1, POSIX::WNOHANG()) > 0);
497 }
498 }
499
500 # please define in subclass
501 #sub hup {
502 # my ($self) = @_;
503 #
504 # syslog('info' , "received signal HUP (restart)");
505 #}
506
507 # please overwrite in subclass
508 sub run {
509 my ($self) = @_;
510
511 for (;;) { # forever
512 syslog('info' , "server is running");
513 sleep(5);
514 }
515 }
516
517 sub start {
518 my ($self, $debug) = @_;
519
520 eval { &$server_run($self, $debug); };
521 if (my $err = $@) {
522 &$log_err("start failed - $err");
523 exit(-1);
524 }
525 }
526
527 my $read_pid = sub {
528 my ($self) = @_;
529
530 my $pid_str = PVE::Tools::file_read_firstline($self->{pidfile});
531
532 return 0 if !$pid_str;
533
534 return 0 if $pid_str !~ m/^(\d+)$/; # untaint
535
536 my $pid = int($1);
537
538 return $pid;
539 };
540
541 sub running {
542 my ($self) = @_;
543
544 my $pid = &$read_pid($self);
545
546 if ($pid) {
547 my $res = PVE::ProcFSTools::check_process_running($pid) ? 1 : 0;
548 return wantarray ? ($res, $pid) : $res;
549 }
550
551 return wantarray ? (0, 0) : 0;
552 }
553
554 sub stop {
555 my ($self) = @_;
556
557 my $pid = &$read_pid($self);
558
559 return if !$pid;
560
561 if (PVE::ProcFSTools::check_process_running($pid)) {
562 kill(15, $pid); # send TERM signal
563 # give some time
564 my $wait_time = $self->{stop_wait_time} || 5;
565 my $running = 1;
566 for (my $i = 0; $i < $wait_time; $i++) {
567 $running = PVE::ProcFSTools::check_process_running($pid);
568 last if !$running;
569 sleep (1);
570 }
571
572 syslog('err', "server still running - send KILL") if $running;
573
574 # to be sure
575 kill(9, $pid);
576 waitpid($pid, 0);
577 }
578
579 if (-f $self->{pidfile}) {
580 eval {
581 # try to get the lock
582 &$lockpidfile($self);
583 &$server_cleanup($self);
584 };
585 if (my $err = $@) {
586 &$log_err("cleanup failed - $err");
587 }
588 }
589 }
590
591 sub register_start_command {
592 my ($self, $description) = @_;
593
594 my $class = ref($self);
595
596 $class->register_method({
597 name => 'start',
598 path => 'start',
599 method => 'POST',
600 description => $description || "Start the daemon.",
601 parameters => {
602 additionalProperties => 0,
603 properties => {
604 debug => {
605 description => "Debug mode - stay in foreground",
606 type => "boolean",
607 optional => 1,
608 default => 0,
609 },
610 },
611 },
612 returns => { type => 'null' },
613
614 code => sub {
615 my ($param) = @_;
616
617 $self->start($param->{debug});
618
619 return undef;
620 }});
621 }
622
623 my $reload_daemon = sub {
624 my ($self, $use_hup) = @_;
625
626 if ($self->{env_restart_pve_daemon}) {
627 $self->start();
628 } else {
629 my ($running, $pid) = $self->running();
630 if (!$running) {
631 $self->start();
632 } else {
633 if ($use_hup) {
634 syslog('info', "send HUP to $pid");
635 kill 1, $pid;
636 } else {
637 $self->stop();
638 $self->start();
639 }
640 }
641 }
642 };
643
644 sub register_restart_command {
645 my ($self, $use_hup, $description) = @_;
646
647 my $class = ref($self);
648
649 $class->register_method({
650 name => 'restart',
651 path => 'restart',
652 method => 'POST',
653 description => $description || "Restart the daemon (or start if not running).",
654 parameters => {
655 additionalProperties => 0,
656 properties => {},
657 },
658 returns => { type => 'null' },
659
660 code => sub {
661 my ($param) = @_;
662
663 &$reload_daemon($self, $use_hup);
664
665 return undef;
666 }});
667 }
668
669 sub register_reload_command {
670 my ($self, $description) = @_;
671
672 my $class = ref($self);
673
674 $class->register_method({
675 name => 'reload',
676 path => 'reload',
677 method => 'POST',
678 description => $description || "Reload daemon configuration (or start if not running).",
679 parameters => {
680 additionalProperties => 0,
681 properties => {},
682 },
683 returns => { type => 'null' },
684
685 code => sub {
686 my ($param) = @_;
687
688 &$reload_daemon($self, 1);
689
690 return undef;
691 }});
692 }
693
694 sub register_stop_command {
695 my ($self, $description) = @_;
696
697 my $class = ref($self);
698
699 $class->register_method({
700 name => 'stop',
701 path => 'stop',
702 method => 'POST',
703 description => $description || "Stop the daemon.",
704 parameters => {
705 additionalProperties => 0,
706 properties => {},
707 },
708 returns => { type => 'null' },
709
710 code => sub {
711 my ($param) = @_;
712
713 $self->stop();
714
715 return undef;
716 }});
717 }
718
719 sub register_status_command {
720 my ($self, $description) = @_;
721
722 my $class = ref($self);
723
724 $class->register_method({
725 name => 'status',
726 path => 'status',
727 method => 'GET',
728 description => "Get daemon status.",
729 parameters => {
730 additionalProperties => 0,
731 properties => {},
732 },
733 returns => {
734 type => 'string',
735 enum => ['stopped', 'running'],
736 },
737 code => sub {
738 my ($param) = @_;
739
740 return $self->running() ? 'running' : 'stopped';
741 }});
742 }
743
744 1;
745