]> git.proxmox.com Git - pve-common.git/blame - src/PVE/RESTEnvironment.pm
allow workers to count warnings and finish tasks in a WARNINGS state
[pve-common.git] / src / PVE / RESTEnvironment.pm
CommitLineData
d9072797
DM
1package PVE::RESTEnvironment;
2
3# NOTE: you can/should provide your own specialice class, and
4# use this a bas class (as example see PVE::RPCEnvironment).
5
6# we use this singleton class to pass RPC related environment values
7
8use strict;
9use warnings;
10use POSIX qw(:sys_wait_h EINTR);
11use IO::Handle;
12use IO::File;
13use IO::Select;
14use Fcntl qw(:flock);
15use PVE::Exception qw(raise raise_perm_exc);
16use PVE::SafeSyslog;
17use PVE::Tools;
18use PVE::INotify;
19use PVE::ProcFSTools;
20
21
22my $rest_env;
23
24# save $SIG{CHLD} handler implementation.
25# simply set $SIG{CHLD} = $worker_reaper;
26# and register forked processes with &$register_worker(pid)
27# Note: using $SIG{CHLD} = 'IGNORE' or $SIG{CHLD} = sub { wait (); } or ...
28# has serious side effects, because perls built in system() and open()
fb3a1b29 29# functions can't get the correct exit status of a child. So we can't use
d9072797
DM
30# that (also see perlipc)
31
32my $WORKER_PIDS;
33my $WORKER_FLAG = 0;
34
35my $log_task_result = sub {
36 my ($upid, $user, $status) = @_;
37
38 return if !$rest_env;
39
40 my $msg = 'successful';
41 my $pri = 'info';
42 if ($status != 0) {
43 my $ec = $status >> 8;
44 my $ic = $status & 255;
45 $msg = $ec ? "failed ($ec)" : "interrupted ($ic)";
46 $pri = 'err';
47 }
48
49 my $tlist = $rest_env->active_workers($upid);
9a42d8a2
TL
50 eval { $rest_env->broadcast_tasklist($tlist); };
51 syslog('err', $@) if $@;
d9072797
DM
52
53 my $task;
54 foreach my $t (@$tlist) {
55 if ($t->{upid} eq $upid) {
56 $task = $t;
57 last;
58 }
59 }
60 if ($task && $task->{status}) {
61 $msg = $task->{status};
62 }
63
ad7962b4 64 $rest_env->log_cluster_msg($pri, $user, "end task $upid $msg");
d9072797
DM
65};
66
67my $worker_reaper = sub {
68 local $!; local $?;
69 foreach my $pid (keys %$WORKER_PIDS) {
70 my $waitpid = waitpid ($pid, WNOHANG);
71 if (defined($waitpid) && ($waitpid == $pid)) {
72 my $info = $WORKER_PIDS->{$pid};
73 if ($info && $info->{upid} && $info->{user}) {
74 &$log_task_result($info->{upid}, $info->{user}, $?);
75 }
76 delete ($WORKER_PIDS->{$pid});
77 }
78 }
79};
80
81my $register_worker = sub {
82 my ($pid, $user, $upid) = @_;
83
84 return if !$pid;
85
86 # do not register if already finished
87 my $waitpid = waitpid ($pid, WNOHANG);
88 if (defined($waitpid) && ($waitpid == $pid)) {
89 delete ($WORKER_PIDS->{$pid});
90 return;
91 }
92
93 $WORKER_PIDS->{$pid} = {
94 user => $user,
95 upid => $upid,
96 };
97};
98
99# initialize environment - must be called once at program startup
100sub init {
101 my ($class, $type, %params) = @_;
102
103 $class = ref($class) || $class;
104
105 die "already initialized" if $rest_env;
106
107 die "unknown environment type"
108 if !$type || $type !~ m/^(cli|pub|priv|ha)$/;
109
110 $SIG{CHLD} = $worker_reaper;
111
112 # environment types
113 # cli ... command started fron command line
096b5f5c 114 # pub ... access from public server (pveproxy)
d9072797 115 # priv ... access from private server (pvedaemon)
096b5f5c 116 # ha ... access from HA resource manager agent (pve-ha-manager)
d9072797 117
ff79ee65
FE
118 my $self = {
119 type => $type,
120 warning_count => 0,
121 };
d9072797
DM
122
123 bless $self, $class;
124
125 foreach my $p (keys %params) {
126 if ($p eq 'atfork') {
127 $self->{$p} = $params{$p};
128 } else {
129 die "unknown option '$p'";
130 }
131 }
132
133 $rest_env = $self;
134
135 my ($sysname, $nodename) = POSIX::uname();
136
137 $nodename =~ s/\..*$//; # strip domain part, if any
138
139 $self->{nodename} = $nodename;
140
141 return $self;
142};
143
144# convenience function for command line tools
145sub setup_default_cli_env {
146 my ($class, $username) = @_;
147
148 $class = ref($class) || $class;
149
150 $username //= 'root@pam';
151
152 PVE::INotify::inotify_init();
153
154 my $rpcenv = $class->init('cli');
155 $rpcenv->init_request();
156 $rpcenv->set_language($ENV{LANG});
157 $rpcenv->set_user($username);
158
159 die "please run as root\n"
160 if ($username eq 'root@pam') && ($> != 0);
161}
162
163# get the singleton
164sub get {
165
166 die "REST environment not initialized" if !$rest_env;
167
168 return $rest_env;
169}
170
171sub set_client_ip {
172 my ($self, $ip) = @_;
173
174 $self->{client_ip} = $ip;
175}
176
177sub get_client_ip {
178 my ($self) = @_;
179
180 return $self->{client_ip};
181}
182
183sub set_result_attrib {
184 my ($self, $key, $value) = @_;
185
186 $self->{result_attributes}->{$key} = $value;
187}
188
189sub get_result_attrib {
190 my ($self, $key) = @_;
191
192 return $self->{result_attributes}->{$key};
193}
194
195sub set_language {
196 my ($self, $lang) = @_;
197
198 # fixme: initialize I18N
199
200 $self->{language} = $lang;
201}
202
203sub get_language {
204 my ($self) = @_;
205
206 return $self->{language};
207}
208
209sub set_user {
210 my ($self, $user) = @_;
211
212 $self->{user} = $user;
213}
214
215sub get_user {
8e6019b1 216 my ($self, $noerr) = @_;
d9072797 217
8e6019b1 218 return $self->{user} if defined($self->{user}) || $noerr;
d9072797 219
8e6019b1 220 die "user name not set\n";
d9072797
DM
221}
222
61aca93a
WB
223sub set_u2f_challenge {
224 my ($self, $challenge) = @_;
225
226 $self->{u2f_challenge} = $challenge;
227}
228
229sub get_u2f_challenge {
230 my ($self, $noerr) = @_;
231
232 return $self->{u2f_challenge} if defined($self->{u2f_challenge}) || $noerr;
233
234 die "no active u2f challenge\n";
235}
236
c7a7aa4d
WB
237sub set_request_host {
238 my ($self, $host) = @_;
239
240 $self->{request_host} = $host;
241}
242
243sub get_request_host {
244 my ($self, $noerr) = @_;
245
246 return $self->{request_host} if defined($self->{request_host}) || $noerr;
247
248 die "no hostname available in current environment\n";
249}
250
d9072797
DM
251sub is_worker {
252 my ($class) = @_;
253
254 return $WORKER_FLAG;
255}
256
257# read/update list of active workers
258# we move all finished tasks to the archive index,
259# but keep aktive and most recent task in the active file.
260# $nocheck ... consider $new_upid still running (avoid that
261# we try to read the reult to early.
262sub active_workers {
263 my ($self, $new_upid, $nocheck) = @_;
264
265 my $lkfn = "/var/log/pve/tasks/.active.lock";
266
267 my $timeout = 10;
268
269 my $code = sub {
270
271 my $tasklist = PVE::INotify::read_file('active');
272
273 my @ta;
274 my $tlist = [];
275 my $thash = {}; # only list task once
276
277 my $check_task = sub {
278 my ($task, $running) = @_;
279
280 if ($running || PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
281 push @$tlist, $task;
282 } else {
283 delete $task->{pid};
284 push @ta, $task;
285 }
286 delete $task->{pstart};
287 };
288
289 foreach my $task (@$tasklist) {
290 my $upid = $task->{upid};
291 next if $thash->{$upid};
292 $thash->{$upid} = $task;
293 &$check_task($task);
294 }
295
296 if ($new_upid && !(my $task = $thash->{$new_upid})) {
297 $task = PVE::Tools::upid_decode($new_upid);
298 $task->{upid} = $new_upid;
299 $thash->{$new_upid} = $task;
300 &$check_task($task, $nocheck);
301 }
302
303
8733bb80 304 @ta = sort { $b->{starttime} <=> $a->{starttime} } @ta;
d9072797
DM
305
306 my $save = defined($new_upid);
307
308 foreach my $task (@ta) {
309 next if $task->{endtime};
310 $task->{endtime} = time();
311 $task->{status} = PVE::Tools::upid_read_status($task->{upid});
312 $save = 1;
313 }
314
315 my $archive = '';
316 my @arlist = ();
317 foreach my $task (@ta) {
318 if (!$task->{saved}) {
319 $archive .= sprintf("%s %08X %s\n", $task->{upid}, $task->{endtime}, $task->{status});
320 $save = 1;
321 push @arlist, $task;
322 $task->{saved} = 1;
323 }
324 }
325
326 if ($archive) {
327 my $size = 0;
328 my $filename = "/var/log/pve/tasks/index";
329 eval {
330 my $fh = IO::File->new($filename, '>>', 0644) ||
331 die "unable to open file '$filename' - $!\n";
332 PVE::Tools::safe_print($filename, $fh, $archive);
333 $size = -s $fh;
334 close($fh) ||
335 die "unable to close file '$filename' - $!\n";
336 };
337 my $err = $@;
338 if ($err) {
339 syslog('err', $err);
340 foreach my $task (@arlist) { # mark as not saved
341 $task->{saved} = 0;
342 }
343 }
344 my $maxsize = 50000; # about 1000 entries
345 if ($size > $maxsize) {
346 rename($filename, "$filename.1");
347 }
348 }
349
350 # we try to reduce the amount of data
351 # list all running tasks and task and a few others
352 # try to limit to 25 tasks
d9072797
DM
353 my $max = 25 - scalar(@$tlist);
354 foreach my $task (@ta) {
355 last if $max <= 0;
356 push @$tlist, $task;
357 $max--;
358 }
359
360 PVE::INotify::write_file('active', $tlist) if $save;
361
362 return $tlist;
363 };
364
365 my $res = PVE::Tools::lock_file($lkfn, $timeout, $code);
366 die $@ if $@;
367
368 return $res;
369}
370
371my $kill_process_group = sub {
372 my ($pid, $pstart) = @_;
373
374 # send kill to process group (negative pid)
375 my $kpid = -$pid;
376
377 # always send signal to all pgrp members
378 kill(15, $kpid); # send TERM signal
379
380 # give max 5 seconds to shut down
381 for (my $i = 0; $i < 5; $i++) {
382 return if !PVE::ProcFSTools::check_process_running($pid, $pstart);
383 sleep (1);
384 }
385
386 # to be sure
387 kill(9, $kpid);
388};
389
390sub check_worker {
a313fe73 391 my ($self, $upid, $killit) = @_;
d9072797
DM
392
393 my $task = PVE::Tools::upid_decode($upid);
394
395 my $running = PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart});
396
397 return 0 if !$running;
398
399 if ($killit) {
400 &$kill_process_group($task->{pid});
401 return 0;
402 }
403
404 return 1;
405}
406
3e2da216
TL
407# acts almost as tee: writes an output both to STDOUT and a task log,
408# we differ as we're worker aware and look also at the childs control pipe,
409# so we know if the function could be executed successfully or not.
410my $tee_worker = sub {
411 my ($childfd, $ctrlfd, $taskfh, $cpid) = @_;
412
413 eval {
414 my $int_count = 0;
415 local $SIG{INT} = local $SIG{QUIT} = local $SIG{TERM} = sub {
416 # always send signal to all pgrp members
417 my $kpid = -$cpid;
418 if ($int_count < 3) {
419 kill(15, $kpid); # send TERM signal
420 } else {
421 kill(9, $kpid); # send KILL signal
422 }
423 $int_count++;
424 };
425 local $SIG{PIPE} = sub { die "broken pipe\n"; };
426
427 my $select = new IO::Select;
428 my $fh = IO::Handle->new_from_fd($childfd, 'r');
429 $select->add($fh);
430
431 my $readbuf = '';
432 my $count;
433 while ($select->count) {
434 my @handles = $select->can_read(1);
435 if (scalar(@handles)) {
436 my $count = sysread ($handles[0], $readbuf, 4096);
437 if (!defined ($count)) {
438 my $err = $!;
439 die "sync pipe read error: $err\n";
440 }
441 last if $count == 0; # eof
442
443 print $readbuf;
444 select->flush();
445
446 print $taskfh $readbuf;
447 $taskfh->flush();
448 } else {
449 # some commands daemonize without closing stdout
450 last if !PVE::ProcFSTools::check_process_running($cpid);
451 }
452 }
453
3e2da216
TL
454 POSIX::read($ctrlfd, $readbuf, 4096);
455 if ($readbuf =~ m/^TASK OK\n?$/) {
456 # skip printing to stdout
457 print $taskfh $readbuf;
458 } elsif ($readbuf =~ m/^TASK ERROR: (.*)\n?$/) {
459 print STDERR "$1\n";
460 print $taskfh "\n$readbuf"; # ensure start on new line for webUI
ff79ee65
FE
461 } elsif ($readbuf =~ m/^TASK WARNINGS: (\d+)\n?$/) {
462 print STDERR "Task finished with $1 warning(s)!\n";
463 print $taskfh "\n$readbuf"; # ensure start on new line for webUI
3e2da216
TL
464 } else {
465 die "got unexpected control message: $readbuf\n";
466 }
467 $taskfh->flush();
468 };
469 my $err = $@;
470
471 POSIX::close($childfd);
472 POSIX::close($ctrlfd);
473
474 if ($err) {
475 $err =~ s/\n/ /mg;
476 print STDERR "$err\n";
477 print $taskfh "TASK ERROR: $err\n";
478 }
479};
480
d9072797
DM
481# start long running workers
482# STDIN is redirected to /dev/null
483# STDOUT,STDERR are redirected to the filename returned by upid_decode
484# NOTE: we simulate running in foreground if ($self->{type} eq 'cli')
485sub fork_worker {
486 my ($self, $dtype, $id, $user, $function, $background) = @_;
487
488 $dtype = 'unknown' if !defined ($dtype);
489 $id = '' if !defined ($id);
490
491 $user = 'root@pve' if !defined ($user);
492
493 my $sync = ($self->{type} eq 'cli' && !$background) ? 1 : 0;
494
495 local $SIG{INT} =
496 local $SIG{QUIT} =
497 local $SIG{PIPE} =
498 local $SIG{TERM} = 'IGNORE';
499
500 my $starttime = time ();
501
502 my @psync = POSIX::pipe();
503 my @csync = POSIX::pipe();
ed52a843 504 my @ctrlfd = POSIX::pipe() if $sync;
d9072797
DM
505
506 my $node = $self->{nodename};
507
508 my $cpid = fork();
509 die "unable to fork worker - $!" if !defined($cpid);
510
511 my $workerpuid = $cpid ? $cpid : $$;
512
513 my $pstart = PVE::ProcFSTools::read_proc_starttime($workerpuid) ||
514 die "unable to read process start time";
515
516 my $upid = PVE::Tools::upid_encode ({
517 node => $node, pid => $workerpuid, pstart => $pstart,
518 starttime => $starttime, type => $dtype, id => $id, user => $user });
519
520 my $outfh;
521
522 if (!$cpid) { # child
523
524 $0 = "task $upid";
525 $WORKER_FLAG = 1;
526
527 $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { die "received interrupt\n"; };
528
529 $SIG{CHLD} = $SIG{PIPE} = 'DEFAULT';
e97f807c 530 $SIG{TTOU} = 'IGNORE';
d9072797 531
6a09f096 532 my $ppgid;
aea06195 533 # set session/process group allows to kill the process group
e97f807c 534 if ($sync && -t STDIN) {
aea06195
TL
535 # some sync'ed workers operate on the tty but setsid sessions lose
536 # the tty, so just create a new pgroup and give it the tty
6a09f096 537 $ppgid = POSIX::getpgrp() or die "failed to get old pgid: $!\n";
4c99c559 538 POSIX::setpgid(0, 0) or die "failed to setpgid: $!\n";
e97f807c
SI
539 POSIX::tcsetpgrp(fileno(STDIN), $$) or die "failed to tcsetpgrp: $!\n";
540 } else {
541 POSIX::setsid();
542 }
d9072797
DM
543
544 POSIX::close ($psync[0]);
ed52a843 545 POSIX::close ($ctrlfd[0]) if $sync;
d9072797
DM
546 POSIX::close ($csync[1]);
547
548 $outfh = $sync ? $psync[1] : undef;
ed52a843 549 my $resfh = $sync ? $ctrlfd[1] : undef;
d9072797
DM
550
551 eval {
552 PVE::INotify::inotify_close();
553
554 if (my $atfork = $self->{atfork}) {
555 &$atfork();
556 }
557
fb3a1b29 558 # same algorithm as used inside SA
d9072797
DM
559 # STDIN = /dev/null
560 my $fd = fileno (STDIN);
561
562 if (!$sync) {
563 close STDIN;
564 POSIX::close(0) if $fd != 0;
565
566 die "unable to redirect STDIN - $!"
567 if !open(STDIN, "</dev/null");
568
569 $outfh = PVE::Tools::upid_open($upid);
4d9f4d62 570 $resfh = fileno($outfh);
d9072797
DM
571 }
572
573
574 # redirect STDOUT
575 $fd = fileno(STDOUT);
576 close STDOUT;
577 POSIX::close (1) if $fd != 1;
578
579 die "unable to redirect STDOUT - $!"
580 if !open(STDOUT, ">&", $outfh);
581
582 STDOUT->autoflush (1);
583
584 # redirect STDERR to STDOUT
585 $fd = fileno (STDERR);
586 close STDERR;
587 POSIX::close(2) if $fd != 2;
588
589 die "unable to redirect STDERR - $!"
590 if !open(STDERR, ">&1");
591
592 STDERR->autoflush(1);
593 };
594 if (my $err = $@) {
595 my $msg = "ERROR: $err";
596 POSIX::write($psync[1], $msg, length ($msg));
597 POSIX::close($psync[1]);
598 POSIX::_exit(1);
599 kill(-9, $$);
600 }
601
602 # sync with parent (signal that we are ready)
95109cc4
TL
603 POSIX::write($psync[1], $upid, length ($upid));
604 POSIX::close($psync[1]) if !$sync; # don't need output pipe if async
d9072797 605
2311859b
WB
606 eval {
607 my $readbuf = '';
608 # sync with parent (wait until parent is ready)
609 POSIX::read($csync[0], $readbuf, 4096);
610 die "parent setup error\n" if $readbuf ne 'OK';
d9072797 611
2311859b
WB
612 if ($self->{type} eq 'ha') {
613 print "task started by HA resource agent\n";
614 }
615 &$function($upid);
616 };
edbb302e 617 my ($msg, $exitcode);
d9072797
DM
618 my $err = $@;
619 if ($err) {
620 chomp $err;
621 $err =~ s/\n/ /mg;
622 syslog('err', $err);
edbb302e
SI
623 $msg = "TASK ERROR: $err\n";
624 $exitcode = -1;
ff79ee65
FE
625 } elsif (my $warnings = $self->{warning_count}) {
626 $msg = "TASK WARNINGS: $warnings\n";
627 $exitcode = 0;
d9072797 628 } else {
edbb302e
SI
629 $msg = "TASK OK\n";
630 $exitcode = 0;
d9072797 631 }
edbb302e 632 POSIX::write($resfh, $msg, length($msg));
6a09f096
SI
633
634 if ($sync) {
635 POSIX::close($resfh);
636 if ( -t STDIN) {
637 POSIX::tcsetpgrp(fileno(STDIN), $ppgid) or
638 die "failed to tcsetpgrp to parent: $!\n";
639 }
640 }
edbb302e 641 POSIX::_exit($exitcode);
a609b2f7 642 kill(-9, $$); # not really needed, just to be sure
d9072797
DM
643 }
644
645 # parent
646
647 POSIX::close ($psync[1]);
ed52a843 648 POSIX::close ($ctrlfd[1]) if $sync;
d9072797
DM
649 POSIX::close ($csync[0]);
650
651 my $readbuf = '';
652 # sync with child (wait until child starts)
653 POSIX::read($psync[0], $readbuf, 4096);
654
655 if (!$sync) {
656 POSIX::close($psync[0]);
657 &$register_worker($cpid, $user, $upid);
658 } else {
659 chomp $readbuf;
660 }
661
662 eval {
663 die "got no worker upid - start worker failed\n" if !$readbuf;
664
665 if ($readbuf =~ m/^ERROR:\s*(.+)$/m) {
666 die "starting worker failed: $1\n";
667 }
668
669 if ($readbuf ne $upid) {
670 die "got strange worker upid ('$readbuf' != '$upid') - start worker failed\n";
671 }
672
673 if ($sync) {
674 $outfh = PVE::Tools::upid_open($upid);
675 }
676 };
677 my $err = $@;
678
679 if (!$err) {
680 my $msg = 'OK';
681 POSIX::write($csync[1], $msg, length ($msg));
682 POSIX::close($csync[1]);
683
684 } else {
685 POSIX::close($csync[1]);
686 kill(-9, $cpid); # make sure it gets killed
687 die $err;
688 }
689
690 $self->log_cluster_msg('info', $user, "starting task $upid");
691
692 my $tlist = $self->active_workers($upid, $sync);
9a42d8a2
TL
693 eval { $self->broadcast_tasklist($tlist); };
694 syslog('err', $@) if $@;
d9072797
DM
695
696 my $res = 0;
697
698 if ($sync) {
ed52a843 699
3e2da216 700 $tee_worker->($psync[0], $ctrlfd[0], $outfh, $cpid);
d9072797
DM
701
702 &$kill_process_group($cpid, $pstart); # make sure it gets killed
703
704 close($outfh);
705
706 waitpid($cpid, 0);
707 $res = $?;
708 &$log_task_result($upid, $user, $res);
709 }
710
711 return wantarray ? ($upid, $res) : $upid;
712}
713
ff79ee65
FE
714sub warn {
715 my ($self, $message) = @_;
716
717 chomp($message);
718
719 print STDERR "WARN: $message\n";
720
721 $self->{warning_count}++;
722}
723
d9072797
DM
724# Abstract function
725
726sub log_cluster_msg {
727 my ($self, $pri, $user, $msg) = @_;
728
729 syslog($pri, "%s", $msg);
730
731 # PVE::Cluster::log_msg($pri, $user, $msg);
732}
733
734sub broadcast_tasklist {
735 my ($self, $tlist) = @_;
736
737 # PVE::Cluster::broadcast_tasklist($tlist);
738}
739
740sub check_api2_permissions {
741 my ($self, $perm, $username, $param) = @_;
742
743 return 1 if !$username && $perm->{user} eq 'world';
744
745 raise_perm_exc("user != null") if !$username;
746
747 return 1 if $username eq 'root@pam';
748
749 raise_perm_exc('user != root@pam') if !$perm;
750
751 return 1 if $perm->{user} && $perm->{user} eq 'all';
752
753 ##return $self->exec_api2_perm_check($perm->{check}, $username, $param)
754 ##if $perm->{check};
755
756 raise_perm_exc();
757}
758
759# init_request - should be called before each REST/CLI request
760sub init_request {
761 my ($self, %params) = @_;
762
dc9c3ffa
DM
763 $self->{result_attributes} = {}
764
765 # if you nedd more, implement in subclass
d9072797
DM
766}
767
7681;