]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/build_tools/gnu_parallel
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rocksdb / build_tools / gnu_parallel
1 #!/usr/bin/env perl
2
3 # Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and
4 # Free Software Foundation, Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <http://www.gnu.org/licenses/>
18 # or write to the Free Software Foundation, Inc., 51 Franklin St,
19 # Fifth Floor, Boston, MA 02110-1301 USA
20
21 # open3 used in Job::start
22 use IPC::Open3;
23 # &WNOHANG used in reaper
24 use POSIX qw(:sys_wait_h setsid ceil :errno_h);
25 # gensym used in Job::start
26 use Symbol qw(gensym);
27 # tempfile used in Job::start
28 use File::Temp qw(tempfile tempdir);
29 # mkpath used in openresultsfile
30 use File::Path;
31 # GetOptions used in get_options_from_array
32 use Getopt::Long;
33 # Used to ensure code quality
34 use strict;
35 use File::Basename;
36
37 if(not $ENV{HOME}) {
38 # $ENV{HOME} is sometimes not set if called from PHP
39 ::warning("\$HOME not set. Using /tmp\n");
40 $ENV{HOME} = "/tmp";
41 }
42
43 save_stdin_stdout_stderr();
44 save_original_signal_handler();
45 parse_options();
46 ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
47 my $number_of_args;
48 if($Global::max_number_of_args) {
49 $number_of_args=$Global::max_number_of_args;
50 } elsif ($opt::X or $opt::m or $opt::xargs) {
51 $number_of_args = undef;
52 } else {
53 $number_of_args = 1;
54 }
55
56 my @command;
57 @command = @ARGV;
58
59 my @fhlist;
60 if($opt::pipepart) {
61 @fhlist = map { open_or_exit($_) } "/dev/null";
62 } else {
63 @fhlist = map { open_or_exit($_) } @opt::a;
64 if(not @fhlist and not $opt::pipe) {
65 @fhlist = (*STDIN);
66 }
67 }
68
69 if($opt::skip_first_line) {
70 # Skip the first line for the first file handle
71 my $fh = $fhlist[0];
72 <$fh>;
73 }
74 if($opt::header and not $opt::pipe) {
75 my $fh = $fhlist[0];
76 # split with colsep or \t
77 # $header force $colsep = \t if undef?
78 my $delimiter = $opt::colsep;
79 $delimiter ||= "\$";
80 my $id = 1;
81 for my $fh (@fhlist) {
82 my $line = <$fh>;
83 chomp($line);
84 ::debug("init", "Delimiter: '$delimiter'");
85 for my $s (split /$delimiter/o, $line) {
86 ::debug("init", "Colname: '$s'");
87 # Replace {colname} with {2}
88 # TODO accept configurable short hands
89 # TODO how to deal with headers in {=...=}
90 for(@command) {
91 s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
92 }
93 $Global::input_source_header{$id} = $s;
94 $id++;
95 }
96 }
97 } else {
98 my $id = 1;
99 for my $fh (@fhlist) {
100 $Global::input_source_header{$id} = $id;
101 $id++;
102 }
103 }
104
105 if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
106 # Parallel check all hosts are up. Remove hosts that are down
107 filter_hosts();
108 }
109
110 if($opt::nonall or $opt::onall) {
111 onall(@command);
112 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
113 }
114
115 # TODO --transfer foo/./bar --cleanup
116 # multiple --transfer and --basefile with different /./
117
118 $Global::JobQueue = JobQueue->new(
119 \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
120
121 if($opt::eta or $opt::bar) {
122 # Count the number of jobs before starting any
123 $Global::JobQueue->total_jobs();
124 }
125 if($opt::pipepart) {
126 @Global::cat_partials = map { pipe_part_files($_) } @opt::a;
127 # Unget the command as many times as there are parts
128 $Global::JobQueue->{'commandlinequeue'}->unget(
129 map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials
130 );
131 }
132 for my $sshlogin (values %Global::host) {
133 $sshlogin->max_jobs_running();
134 }
135
136 init_run_jobs();
137 my $sem;
138 if($Global::semaphore) {
139 $sem = acquire_semaphore();
140 }
141 $SIG{TERM} = \&start_no_new_jobs;
142
143 start_more_jobs();
144 if(not $opt::pipepart) {
145 if($opt::pipe) {
146 spreadstdin();
147 }
148 }
149 ::debug("init", "Start draining\n");
150 drain_job_queue();
151 ::debug("init", "Done draining\n");
152 reaper();
153 ::debug("init", "Done reaping\n");
154 if($opt::pipe and @opt::a) {
155 for my $job (@Global::tee_jobs) {
156 unlink $job->fh(2,"name");
157 $job->set_fh(2,"name","");
158 $job->print();
159 unlink $job->fh(1,"name");
160 }
161 }
162 ::debug("init", "Cleaning\n");
163 cleanup();
164 if($Global::semaphore) {
165 $sem->release();
166 }
167 for(keys %Global::sshmaster) {
168 kill "TERM", $_;
169 }
170 ::debug("init", "Halt\n");
171 if($opt::halt_on_error) {
172 wait_and_exit($Global::halt_on_error_exitstatus);
173 } else {
174 wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
175 }
176
177 sub __PIPE_MODE__ {}
178
179 sub pipe_part_files {
180 # Input:
181 # $file = the file to read
182 # Returns:
183 # @commands that will cat_partial each part
184 my ($file) = @_;
185 my $buf = "";
186 my $header = find_header(\$buf,open_or_exit($file));
187 # find positions
188 my @pos = find_split_positions($file,$opt::blocksize,length $header);
189 # Make @cat_partials
190 my @cat_partials = ();
191 for(my $i=0; $i<$#pos; $i++) {
192 push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]);
193 }
194 # Remote exec should look like:
195 # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\)
196 # ssh -tt not allowed. Remote will die due to broken pipe anyway.
197 # TODO test remote with --fifo / --cat
198 return @cat_partials;
199 }
200
201 sub find_header {
202 # Input:
203 # $buf_ref = reference to read-in buffer
204 # $fh = filehandle to read from
205 # Uses:
206 # $opt::header
207 # $opt::blocksize
208 # Returns:
209 # $header string
210 my ($buf_ref, $fh) = @_;
211 my $header = "";
212 if($opt::header) {
213 if($opt::header eq ":") { $opt::header = "(.*\n)"; }
214 # Number = number of lines
215 $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
216 while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) {
217 if($$buf_ref=~s/^($opt::header)//) {
218 $header = $1;
219 last;
220 }
221 }
222 }
223 return $header;
224 }
225
226 sub find_split_positions {
227 # Input:
228 # $file = the file to read
229 # $block = (minimal) --block-size of each chunk
230 # $headerlen = length of header to be skipped
231 # Uses:
232 # $opt::recstart
233 # $opt::recend
234 # Returns:
235 # @positions of block start/end
236 my($file, $block, $headerlen) = @_;
237 my $size = -s $file;
238 $block = int $block;
239 # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
240 # The optimal dd blocksize for freebsd = 2^15..2^17
241 my $dd_block_size = 131072; # 2^17
242 my @pos;
243 my ($recstart,$recend) = recstartrecend();
244 my $recendrecstart = $recend.$recstart;
245 my $fh = ::open_or_exit($file);
246 push(@pos,$headerlen);
247 for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
248 my $buf;
249 seek($fh, $pos, 0) || die;
250 while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
251 if($opt::regexp) {
252 # If match /$recend$recstart/ => Record position
253 if($buf =~ /(.*$recend)$recstart/os) {
254 my $i = length($1);
255 push(@pos,$pos+$i);
256 # Start looking for next record _after_ this match
257 $pos += $i;
258 last;
259 }
260 } else {
261 # If match $recend$recstart => Record position
262 my $i = index($buf,$recendrecstart);
263 if($i != -1) {
264 push(@pos,$pos+$i);
265 # Start looking for next record _after_ this match
266 $pos += $i;
267 last;
268 }
269 }
270 }
271 }
272 push(@pos,$size);
273 close $fh;
274 return @pos;
275 }
276
277 sub cat_partial {
278 # Input:
279 # $file = the file to read
280 # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
281 # Returns:
282 # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout
283 my($file, @start_end) = @_;
284 my($start, $i);
285 # Convert start_end to start_len
286 my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end;
287 return "<". shell_quote_scalar($file) .
288 q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } .
289 " @start_len";
290 }
291
292 sub spreadstdin {
293 # read a record
294 # Spawn a job and print the record to it.
295 # Uses:
296 # $opt::blocksize
297 # STDIN
298 # $opr::r
299 # $Global::max_lines
300 # $Global::max_number_of_args
301 # $opt::regexp
302 # $Global::start_no_new_jobs
303 # $opt::roundrobin
304 # %Global::running
305
306 my $buf = "";
307 my ($recstart,$recend) = recstartrecend();
308 my $recendrecstart = $recend.$recstart;
309 my $chunk_number = 1;
310 my $one_time_through;
311 my $blocksize = $opt::blocksize;
312 my $in = *STDIN;
313 my $header = find_header(\$buf,$in);
314 while(1) {
315 my $anything_written = 0;
316 if(not read($in,substr($buf,length $buf,0),$blocksize)) {
317 # End-of-file
318 $chunk_number != 1 and last;
319 # Force the while-loop once if everything was read by header reading
320 $one_time_through++ and last;
321 }
322 if($opt::r) {
323 # Remove empty lines
324 $buf =~ s/^\s*\n//gm;
325 if(length $buf == 0) {
326 next;
327 }
328 }
329 if($Global::max_lines and not $Global::max_number_of_args) {
330 # Read n-line records
331 my $n_lines = $buf =~ tr/\n/\n/;
332 my $last_newline_pos = rindex($buf,"\n");
333 while($n_lines % $Global::max_lines) {
334 $n_lines--;
335 $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1);
336 }
337 # Chop at $last_newline_pos as that is where n-line record ends
338 $anything_written +=
339 write_record_to_pipe($chunk_number++,\$header,\$buf,
340 $recstart,$recend,$last_newline_pos+1);
341 substr($buf,0,$last_newline_pos+1) = "";
342 } elsif($opt::regexp) {
343 if($Global::max_number_of_args) {
344 # -N => (start..*?end){n}
345 # -L -N => (start..*?end){n*l}
346 my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
347 while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) {
348 # Copy to modifiable variable
349 my $b = $1;
350 $anything_written +=
351 write_record_to_pipe($chunk_number++,\$header,\$b,
352 $recstart,$recend,length $1);
353 }
354 } else {
355 # Find the last recend-recstart in $buf
356 if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
357 # Copy to modifiable variable
358 my $b = $1;
359 $anything_written +=
360 write_record_to_pipe($chunk_number++,\$header,\$b,
361 $recstart,$recend,length $1);
362 }
363 }
364 } else {
365 if($Global::max_number_of_args) {
366 # -N => (start..*?end){n}
367 my $i = 0;
368 my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
369 while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) {
370 $i += length $recend; # find the actual splitting location
371 $anything_written +=
372 write_record_to_pipe($chunk_number++,\$header,\$buf,
373 $recstart,$recend,$i);
374 substr($buf,0,$i) = "";
375 }
376 } else {
377 # Find the last recend-recstart in $buf
378 my $i = rindex($buf,$recendrecstart);
379 if($i != -1) {
380 $i += length $recend; # find the actual splitting location
381 $anything_written +=
382 write_record_to_pipe($chunk_number++,\$header,\$buf,
383 $recstart,$recend,$i);
384 substr($buf,0,$i) = "";
385 }
386 }
387 }
388 if(not $anything_written and not eof($in)) {
389 # Nothing was written - maybe the block size < record size?
390 # Increase blocksize exponentially
391 my $old_blocksize = $blocksize;
392 $blocksize = ceil($blocksize * 1.3 + 1);
393 ::warning("A record was longer than $old_blocksize. " .
394 "Increasing to --blocksize $blocksize\n");
395 }
396 }
397 ::debug("init", "Done reading input\n");
398
399 # If there is anything left in the buffer write it
400 substr($buf,0,0) = "";
401 write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf);
402
403 $Global::start_no_new_jobs ||= 1;
404 if($opt::roundrobin) {
405 for my $job (values %Global::running) {
406 close $job->fh(0,"w");
407 }
408 my %incomplete_jobs = %Global::running;
409 my $sleep = 1;
410 while(keys %incomplete_jobs) {
411 my $something_written = 0;
412 for my $pid (keys %incomplete_jobs) {
413 my $job = $incomplete_jobs{$pid};
414 if($job->stdin_buffer_length()) {
415 $something_written += $job->non_block_write();
416 } else {
417 delete $incomplete_jobs{$pid}
418 }
419 }
420 if($something_written) {
421 $sleep = $sleep/2+0.001;
422 }
423 $sleep = ::reap_usleep($sleep);
424 }
425 }
426 }
427
428 sub recstartrecend {
429 # Uses:
430 # $opt::recstart
431 # $opt::recend
432 # Returns:
433 # $recstart,$recend with default values and regexp conversion
434 my($recstart,$recend);
435 if(defined($opt::recstart) and defined($opt::recend)) {
436 # If both --recstart and --recend is given then both must match
437 $recstart = $opt::recstart;
438 $recend = $opt::recend;
439 } elsif(defined($opt::recstart)) {
440 # If --recstart is given it must match start of record
441 $recstart = $opt::recstart;
442 $recend = "";
443 } elsif(defined($opt::recend)) {
444 # If --recend is given then it must match end of record
445 $recstart = "";
446 $recend = $opt::recend;
447 }
448
449 if($opt::regexp) {
450 # If $recstart/$recend contains '|' this should only apply to the regexp
451 $recstart = "(?:".$recstart.")";
452 $recend = "(?:".$recend.")";
453 } else {
454 # $recstart/$recend = printf strings (\n)
455 $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
456 $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
457 }
458 return ($recstart,$recend);
459 }
460
461 sub nindex {
462 # See if string is in buffer N times
463 # Returns:
464 # the position where the Nth copy is found
465 my ($buf_ref, $str, $n) = @_;
466 my $i = 0;
467 for(1..$n) {
468 $i = index($$buf_ref,$str,$i+1);
469 if($i == -1) { last }
470 }
471 return $i;
472 }
473
474 {
475 my @robin_queue;
476
477 sub round_robin_write {
478 # Input:
479 # $header_ref = ref to $header string
480 # $block_ref = ref to $block to be written
481 # $recstart = record start string
482 # $recend = record end string
483 # $endpos = end position of $block
484 # Uses:
485 # %Global::running
486 my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_;
487 my $something_written = 0;
488 my $block_passed = 0;
489 my $sleep = 1;
490 while(not $block_passed) {
491 # Continue flushing existing buffers
492 # until one is empty and a new block is passed
493 # Make a queue to spread the blocks evenly
494 if(not @robin_queue) {
495 push @robin_queue, values %Global::running;
496 }
497 while(my $job = shift @robin_queue) {
498 if($job->stdin_buffer_length() > 0) {
499 $something_written += $job->non_block_write();
500 } else {
501 $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend);
502 $block_passed = 1;
503 $job->set_virgin(0);
504 $something_written += $job->non_block_write();
505 last;
506 }
507 }
508 $sleep = ::reap_usleep($sleep);
509 }
510 return $something_written;
511 }
512 }
513
514 sub write_record_to_pipe {
515 # Fork then
516 # Write record from pos 0 .. $endpos to pipe
517 # Input:
518 # $chunk_number = sequence number - to see if already run
519 # $header_ref = reference to header string to prepend
520 # $record_ref = reference to record to write
521 # $recstart = start string of record
522 # $recend = end string of record
523 # $endpos = position in $record_ref where record ends
524 # Uses:
525 # $Global::job_already_run
526 # $opt::roundrobin
527 # @Global::virgin_jobs
528 # Returns:
529 # Number of chunks written (0 or 1)
530 my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_;
531 if($endpos == 0) { return 0; }
532 if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
533 if($opt::roundrobin) {
534 return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos);
535 }
536 # If no virgin found, backoff
537 my $sleep = 0.0001; # 0.01 ms - better performance on highend
538 while(not @Global::virgin_jobs) {
539 ::debug("pipe", "No virgin jobs");
540 $sleep = ::reap_usleep($sleep);
541 # Jobs may not be started because of loadavg
542 # or too little time between each ssh login.
543 start_more_jobs();
544 }
545 my $job = shift @Global::virgin_jobs;
546 # Job is no longer virgin
547 $job->set_virgin(0);
548 if(fork()) {
549 # Skip
550 } else {
551 # Chop of at $endpos as we do not know how many rec_sep will
552 # be removed.
553 substr($$record_ref,$endpos,length $$record_ref) = "";
554 # Remove rec_sep
555 if($opt::remove_rec_sep) {
556 Job::remove_rec_sep($record_ref,$recstart,$recend);
557 }
558 $job->write($header_ref);
559 $job->write($record_ref);
560 close $job->fh(0,"w");
561 exit(0);
562 }
563 close $job->fh(0,"w");
564 return 1;
565 }
566
567 sub __SEM_MODE__ {}
568
569 sub acquire_semaphore {
570 # Acquires semaphore. If needed: spawns to the background
571 # Uses:
572 # @Global::host
573 # Returns:
574 # The semaphore to be released when jobs is complete
575 $Global::host{':'} = SSHLogin->new(":");
576 my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
577 $sem->acquire();
578 if($Semaphore::fg) {
579 # skip
580 } else {
581 # If run in the background, the PID will change
582 # therefore release and re-acquire the semaphore
583 $sem->release();
584 if(fork()) {
585 exit(0);
586 } else {
587 # child
588 # Get a semaphore for this pid
589 ::die_bug("Can't start a new session: $!") if setsid() == -1;
590 $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
591 $sem->acquire();
592 }
593 }
594 return $sem;
595 }
596
597 sub __PARSE_OPTIONS__ {}
598
599 sub options_hash {
600 # Returns:
601 # %hash = the GetOptions config
602 return
603 ("debug|D=s" => \$opt::D,
604 "xargs" => \$opt::xargs,
605 "m" => \$opt::m,
606 "X" => \$opt::X,
607 "v" => \@opt::v,
608 "joblog=s" => \$opt::joblog,
609 "results|result|res=s" => \$opt::results,
610 "resume" => \$opt::resume,
611 "resume-failed|resumefailed" => \$opt::resume_failed,
612 "silent" => \$opt::silent,
613 #"silent-error|silenterror" => \$opt::silent_error,
614 "keep-order|keeporder|k" => \$opt::keeporder,
615 "group" => \$opt::group,
616 "g" => \$opt::retired,
617 "ungroup|u" => \$opt::ungroup,
618 "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer,
619 "tmux" => \$opt::tmux,
620 "null|0" => \$opt::0,
621 "quote|q" => \$opt::q,
622 # Replacement strings
623 "parens=s" => \$opt::parens,
624 "rpl=s" => \@opt::rpl,
625 "plus" => \$opt::plus,
626 "I=s" => \$opt::I,
627 "extensionreplace|er=s" => \$opt::U,
628 "U=s" => \$opt::retired,
629 "basenamereplace|bnr=s" => \$opt::basenamereplace,
630 "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
631 "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
632 "seqreplace=s" => \$opt::seqreplace,
633 "slotreplace=s" => \$opt::slotreplace,
634 "jobs|j=s" => \$opt::jobs,
635 "delay=f" => \$opt::delay,
636 "sshdelay=f" => \$opt::sshdelay,
637 "load=s" => \$opt::load,
638 "noswap" => \$opt::noswap,
639 "max-line-length-allowed" => \$opt::max_line_length_allowed,
640 "number-of-cpus" => \$opt::number_of_cpus,
641 "number-of-cores" => \$opt::number_of_cores,
642 "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
643 "shellquote|shell_quote|shell-quote" => \$opt::shellquote,
644 "nice=i" => \$opt::nice,
645 "timeout=s" => \$opt::timeout,
646 "tag" => \$opt::tag,
647 "tagstring|tag-string=s" => \$opt::tagstring,
648 "onall" => \$opt::onall,
649 "nonall" => \$opt::nonall,
650 "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
651 "sshlogin|S=s" => \@opt::sshlogin,
652 "sshloginfile|slf=s" => \@opt::sshloginfile,
653 "controlmaster|M" => \$opt::controlmaster,
654 "return=s" => \@opt::return,
655 "trc=s" => \@opt::trc,
656 "transfer" => \$opt::transfer,
657 "cleanup" => \$opt::cleanup,
658 "basefile|bf=s" => \@opt::basefile,
659 "B=s" => \$opt::retired,
660 "ctrlc|ctrl-c" => \$opt::ctrlc,
661 "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc,
662 "workdir|work-dir|wd=s" => \$opt::workdir,
663 "W=s" => \$opt::retired,
664 "tmpdir=s" => \$opt::tmpdir,
665 "tempdir=s" => \$opt::tmpdir,
666 "use-compress-program|compress-program=s" => \$opt::compress_program,
667 "use-decompress-program|decompress-program=s" => \$opt::decompress_program,
668 "compress" => \$opt::compress,
669 "tty" => \$opt::tty,
670 "T" => \$opt::retired,
671 "halt-on-error|halt=s" => \$opt::halt_on_error,
672 "H=i" => \$opt::retired,
673 "retries=i" => \$opt::retries,
674 "dry-run|dryrun" => \$opt::dryrun,
675 "progress" => \$opt::progress,
676 "eta" => \$opt::eta,
677 "bar" => \$opt::bar,
678 "arg-sep|argsep=s" => \$opt::arg_sep,
679 "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
680 "trim=s" => \$opt::trim,
681 "env=s" => \@opt::env,
682 "recordenv|record-env" => \$opt::record_env,
683 "plain" => \$opt::plain,
684 "profile|J=s" => \@opt::profile,
685 "pipe|spreadstdin" => \$opt::pipe,
686 "robin|round-robin|roundrobin" => \$opt::roundrobin,
687 "recstart=s" => \$opt::recstart,
688 "recend=s" => \$opt::recend,
689 "regexp|regex" => \$opt::regexp,
690 "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
691 "files|output-as-files|outputasfiles" => \$opt::files,
692 "block|block-size|blocksize=s" => \$opt::blocksize,
693 "tollef" => \$opt::retired,
694 "gnu" => \$opt::gnu,
695 "xapply" => \$opt::xapply,
696 "bibtex" => \$opt::bibtex,
697 "nn|nonotice|no-notice" => \$opt::no_notice,
698 # xargs-compatibility - implemented, man, testsuite
699 "max-procs|P=s" => \$opt::jobs,
700 "delimiter|d=s" => \$opt::d,
701 "max-chars|s=i" => \$opt::max_chars,
702 "arg-file|a=s" => \@opt::a,
703 "no-run-if-empty|r" => \$opt::r,
704 "replace|i:s" => \$opt::i,
705 "E=s" => \$opt::eof,
706 "eof|e:s" => \$opt::eof,
707 "max-args|n=i" => \$opt::max_args,
708 "max-replace-args|N=i" => \$opt::max_replace_args,
709 "colsep|col-sep|C=s" => \$opt::colsep,
710 "help|h" => \$opt::help,
711 "L=f" => \$opt::L,
712 "max-lines|l:f" => \$opt::max_lines,
713 "interactive|p" => \$opt::p,
714 "verbose|t" => \$opt::verbose,
715 "version|V" => \$opt::version,
716 "minversion|min-version=i" => \$opt::minversion,
717 "show-limits|showlimits" => \$opt::show_limits,
718 "exit|x" => \$opt::x,
719 # Semaphore
720 "semaphore" => \$opt::semaphore,
721 "semaphoretimeout=i" => \$opt::semaphoretimeout,
722 "semaphorename|id=s" => \$opt::semaphorename,
723 "fg" => \$opt::fg,
724 "bg" => \$opt::bg,
725 "wait" => \$opt::wait,
726 # Shebang #!/usr/bin/parallel --shebang
727 "shebang|hashbang" => \$opt::shebang,
728 "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles,
729 "Y" => \$opt::retired,
730 "skip-first-line" => \$opt::skip_first_line,
731 "header=s" => \$opt::header,
732 "cat" => \$opt::cat,
733 "fifo" => \$opt::fifo,
734 "pipepart|pipe-part" => \$opt::pipepart,
735 "hgrp|hostgroup|hostgroups" => \$opt::hostgroups,
736 );
737 }
738
739 sub get_options_from_array {
740 # Run GetOptions on @array
741 # Input:
742 # $array_ref = ref to @ARGV to parse
743 # @keep_only = Keep only these options
744 # Uses:
745 # @ARGV
746 # Returns:
747 # true if parsing worked
748 # false if parsing failed
749 # @$array_ref is changed
750 my ($array_ref, @keep_only) = @_;
751 if(not @$array_ref) {
752 # Empty array: No need to look more at that
753 return 1;
754 }
755 # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
756 # supported everywhere
757 my @save_argv;
758 my $this_is_ARGV = (\@::ARGV == $array_ref);
759 if(not $this_is_ARGV) {
760 @save_argv = @::ARGV;
761 @::ARGV = @{$array_ref};
762 }
763 # If @keep_only set: Ignore all values except @keep_only
764 my %options = options_hash();
765 if(@keep_only) {
766 my (%keep,@dummy);
767 @keep{@keep_only} = @keep_only;
768 for my $k (grep { not $keep{$_} } keys %options) {
769 # Store the value of the option in @dummy
770 $options{$k} = \@dummy;
771 }
772 }
773 my $retval = GetOptions(%options);
774 if(not $this_is_ARGV) {
775 @{$array_ref} = @::ARGV;
776 @::ARGV = @save_argv;
777 }
778 return $retval;
779 }
780
781 sub parse_options {
782 # Returns: N/A
783 # Defaults:
784 $Global::version = 20141122;
785 $Global::progname = 'parallel';
786 $Global::infinity = 2**31;
787 $Global::debug = 0;
788 $Global::verbose = 0;
789 $Global::quoting = 0;
790 # Read only table with default --rpl values
791 %Global::replace =
792 (
793 '{}' => '',
794 '{#}' => '1 $_=$job->seq()',
795 '{%}' => '1 $_=$job->slot()',
796 '{/}' => 's:.*/::',
797 '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);',
798 '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
799 '{.}' => 's:\.[^/.]+$::',
800 );
801 %Global::plus =
802 (
803 # {} = {+/}/{/}
804 # = {.}.{+.} = {+/}/{/.}.{+.}
805 # = {..}.{+..} = {+/}/{/..}.{+..}
806 # = {...}.{+...} = {+/}/{/...}.{+...}
807 '{+/}' => 's:/[^/]*$::',
808 '{+.}' => 's:.*\.::',
809 '{+..}' => 's:.*\.([^.]*\.):$1:',
810 '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
811 '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
812 '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
813 '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
814 '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
815 );
816 # Modifiable copy of %Global::replace
817 %Global::rpl = %Global::replace;
818 $Global::parens = "{==}";
819 $/="\n";
820 $Global::ignore_empty = 0;
821 $Global::interactive = 0;
822 $Global::stderr_verbose = 0;
823 $Global::default_simultaneous_sshlogins = 9;
824 $Global::exitstatus = 0;
825 $Global::halt_on_error_exitstatus = 0;
826 $Global::arg_sep = ":::";
827 $Global::arg_file_sep = "::::";
828 $Global::trim = 'n';
829 $Global::max_jobs_running = 0;
830 $Global::job_already_run = '';
831 $ENV{'TMPDIR'} ||= "/tmp";
832
833 @ARGV=read_options();
834
835 if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
836 $Global::debug = $opt::D;
837 $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh";
838 if(defined $opt::X) { $Global::ContextReplace = 1; }
839 if(defined $opt::silent) { $Global::verbose = 0; }
840 if(defined $opt::0) { $/ = "\0"; }
841 if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; }
842 if(defined $opt::p) { $Global::interactive = $opt::p; }
843 if(defined $opt::q) { $Global::quoting = 1; }
844 if(defined $opt::r) { $Global::ignore_empty = 1; }
845 if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
846 # Deal with --rpl
847 sub rpl {
848 # Modify %Global::rpl
849 # Replace $old with $new
850 my ($old,$new) = @_;
851 if($old ne $new) {
852 $Global::rpl{$new} = $Global::rpl{$old};
853 delete $Global::rpl{$old};
854 }
855 }
856 if(defined $opt::parens) { $Global::parens = $opt::parens; }
857 my $parenslen = 0.5*length $Global::parens;
858 $Global::parensleft = substr($Global::parens,0,$parenslen);
859 $Global::parensright = substr($Global::parens,$parenslen);
860 if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
861 if(defined $opt::I) { rpl('{}',$opt::I); }
862 if(defined $opt::U) { rpl('{.}',$opt::U); }
863 if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
864 if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
865 if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
866 if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
867 if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
868 if(defined $opt::basenameextensionreplace) {
869 rpl('{/.}',$opt::basenameextensionreplace);
870 }
871 for(@opt::rpl) {
872 # Create $Global::rpl entries for --rpl options
873 # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
874 my ($shorthand,$long) = split/ /,$_,2;
875 $Global::rpl{$shorthand} = $long;
876 }
877 if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
878 if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; }
879 if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); }
880 if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
881 if(defined $opt::help) { die_usage(); }
882 if(defined $opt::colsep) { $Global::trim = 'lr'; }
883 if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; }
884 if(defined $opt::trim) { $Global::trim = $opt::trim; }
885 if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
886 if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; }
887 if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); }
888 if(defined $opt::number_of_cores) {
889 print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
890 }
891 if(defined $opt::max_line_length_allowed) {
892 print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
893 }
894 if(defined $opt::version) { version(); wait_and_exit(0); }
895 if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); }
896 if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
897 if(defined $opt::show_limits) { show_limits(); }
898 if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
899 if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
900 if(@opt::return) { push @Global::ret_files, @opt::return; }
901 if(not defined $opt::recstart and
902 not defined $opt::recend) { $opt::recend = "\n"; }
903 if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
904 $opt::blocksize = multiply_binary_prefix($opt::blocksize);
905 if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
906 if(defined $opt::semaphore) { $Global::semaphore = 1; }
907 if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
908 if(defined $opt::semaphorename) { $Global::semaphore = 1; }
909 if(defined $opt::fg) { $Global::semaphore = 1; }
910 if(defined $opt::bg) { $Global::semaphore = 1; }
911 if(defined $opt::wait) { $Global::semaphore = 1; }
912 if(defined $opt::halt_on_error and
913 $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; }
914 if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
915 ::error("--timeout must be seconds or percentage\n");
916 wait_and_exit(255);
917 }
918 if(defined $opt::minversion) {
919 print $Global::version,"\n";
920 if($Global::version < $opt::minversion) {
921 wait_and_exit(255);
922 } else {
923 wait_and_exit(0);
924 }
925 }
926 if(not defined $opt::delay) {
927 # Set --delay to --sshdelay if not set
928 $opt::delay = $opt::sshdelay;
929 }
930 if($opt::compress_program) {
931 $opt::compress = 1;
932 $opt::decompress_program ||= $opt::compress_program." -dc";
933 }
934 if($opt::compress) {
935 my ($compress, $decompress) = find_compression_program();
936 $opt::compress_program ||= $compress;
937 $opt::decompress_program ||= $decompress;
938 }
939 if(defined $opt::nonall) {
940 # Append a dummy empty argument
941 push @ARGV, $Global::arg_sep, "";
942 }
943 if(defined $opt::tty) {
944 # Defaults for --tty: -j1 -u
945 # Can be overridden with -jXXX -g
946 if(not defined $opt::jobs) {
947 $opt::jobs = 1;
948 }
949 if(not defined $opt::group) {
950 $opt::ungroup = 0;
951 }
952 }
953 if(@opt::trc) {
954 push @Global::ret_files, @opt::trc;
955 $opt::transfer = 1;
956 $opt::cleanup = 1;
957 }
958 if(defined $opt::max_lines) {
959 if($opt::max_lines eq "-0") {
960 # -l -0 (swallowed -0)
961 $opt::max_lines = 1;
962 $opt::0 = 1;
963 $/ = "\0";
964 } elsif ($opt::max_lines == 0) {
965 # If not given (or if 0 is given) => 1
966 $opt::max_lines = 1;
967 }
968 $Global::max_lines = $opt::max_lines;
969 if(not $opt::pipe) {
970 # --pipe -L means length of record - not max_number_of_args
971 $Global::max_number_of_args ||= $Global::max_lines;
972 }
973 }
974
975 # Read more than one arg at a time (-L, -N)
976 if(defined $opt::L) {
977 $Global::max_lines = $opt::L;
978 if(not $opt::pipe) {
979 # --pipe -L means length of record - not max_number_of_args
980 $Global::max_number_of_args ||= $Global::max_lines;
981 }
982 }
983 if(defined $opt::max_replace_args) {
984 $Global::max_number_of_args = $opt::max_replace_args;
985 $Global::ContextReplace = 1;
986 }
987 if((defined $opt::L or defined $opt::max_replace_args)
988 and
989 not ($opt::xargs or $opt::m)) {
990 $Global::ContextReplace = 1;
991 }
992 if(defined $opt::tag and not defined $opt::tagstring) {
993 $opt::tagstring = "\257<\257>"; # Default = {}
994 }
995 if(defined $opt::pipepart and
996 (defined $opt::L or defined $opt::max_lines
997 or defined $opt::max_replace_args)) {
998 ::error("--pipepart is incompatible with --max-replace-args, ",
999 "--max-lines, and -L.\n");
1000 wait_and_exit(255);
1001 }
1002 if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) {
1003 # Deal with ::: and ::::
1004 @ARGV=read_args_from_command_line();
1005 }
1006
1007 # Semaphore defaults
1008 # Must be done before computing number of processes and max_line_length
1009 # because when running as a semaphore GNU Parallel does not read args
1010 $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
1011 if($Global::semaphore) {
1012 # A semaphore does not take input from neither stdin nor file
1013 @opt::a = ("/dev/null");
1014 push(@Global::unget_argv, [Arg->new("")]);
1015 $Semaphore::timeout = $opt::semaphoretimeout || 0;
1016 if(defined $opt::semaphorename) {
1017 $Semaphore::name = $opt::semaphorename;
1018 } else {
1019 $Semaphore::name = `tty`;
1020 chomp $Semaphore::name;
1021 }
1022 $Semaphore::fg = $opt::fg;
1023 $Semaphore::wait = $opt::wait;
1024 $Global::default_simultaneous_sshlogins = 1;
1025 if(not defined $opt::jobs) {
1026 $opt::jobs = 1;
1027 }
1028 if($Global::interactive and $opt::bg) {
1029 ::error("Jobs running in the ".
1030 "background cannot be interactive.\n");
1031 ::wait_and_exit(255);
1032 }
1033 }
1034 if(defined $opt::eta) {
1035 $opt::progress = $opt::eta;
1036 }
1037 if(defined $opt::bar) {
1038 $opt::progress = $opt::bar;
1039 }
1040 if(defined $opt::retired) {
1041 ::error("-g has been retired. Use --group.\n");
1042 ::error("-B has been retired. Use --bf.\n");
1043 ::error("-T has been retired. Use --tty.\n");
1044 ::error("-U has been retired. Use --er.\n");
1045 ::error("-W has been retired. Use --wd.\n");
1046 ::error("-Y has been retired. Use --shebang.\n");
1047 ::error("-H has been retired. Use --halt.\n");
1048 ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n");
1049 ::wait_and_exit(255);
1050 }
1051 citation_notice();
1052
1053 parse_sshlogin();
1054 parse_env_var();
1055
1056 if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
1057 # As we do not know the max line length on the remote machine
1058 # long commands generated by xargs may fail
1059 # If opt_N is set, it is probably safe
1060 ::warning("Using -X or -m with --sshlogin may fail.\n");
1061 }
1062
1063 if(not defined $opt::jobs) {
1064 $opt::jobs = "100%";
1065 }
1066 open_joblog();
1067 }
1068
1069 sub env_quote {
1070 # Input:
1071 # $v = value to quote
1072 # Returns:
1073 # $v = value quoted as environment variable
1074 my $v = $_[0];
1075 $v =~ s/([\\])/\\$1/g;
1076 $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g;
1077 $v =~ s/\n/"\n"/g;
1078 return $v;
1079 }
1080
1081 sub record_env {
1082 # Record current %ENV-keys in ~/.parallel/ignored_vars
1083 # Returns: N/A
1084 my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars";
1085 if(open(my $vars_fh, ">", $ignore_filename)) {
1086 print $vars_fh map { $_,"\n" } keys %ENV;
1087 } else {
1088 ::error("Cannot write to $ignore_filename\n");
1089 ::wait_and_exit(255);
1090 }
1091 }
1092
1093 sub parse_env_var {
1094 # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen
1095 #
1096 # Bash functions must be parsed to export them remotely
1097 # Pre-shellshock style bash function:
1098 # myfunc=() {...
1099 # Post-shellshock style bash function:
1100 # BASH_FUNC_myfunc()=() {...
1101 #
1102 # Uses:
1103 # $Global::envvar = eval string that will set variables in both bash and csh
1104 # $Global::envwarn = If functions are used: Give warning in csh
1105 # $Global::envvarlen = length of $Global::envvar
1106 # @opt::env
1107 # $Global::shell
1108 # %ENV
1109 # Returns: N/A
1110 $Global::envvar = "";
1111 $Global::envwarn = "";
1112 my @vars = ('parallel_bash_environment');
1113 for my $varstring (@opt::env) {
1114 # Split up --env VAR1,VAR2
1115 push @vars, split /,/, $varstring;
1116 }
1117 if(grep { /^_$/ } @vars) {
1118 # --env _
1119 # Include all vars that are not in a clean environment
1120 if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) {
1121 my @ignore = <$vars_fh>;
1122 chomp @ignore;
1123 my %ignore;
1124 @ignore{@ignore} = @ignore;
1125 close $vars_fh;
1126 push @vars, grep { not defined $ignore{$_} } keys %ENV;
1127 @vars = grep { not /^_$/ } @vars;
1128 } else {
1129 ::error("Run '$Global::progname --record-env' in a clean environment first.\n");
1130 ::wait_and_exit(255);
1131 }
1132 }
1133 # Duplicate vars as BASH functions to include post-shellshock functions.
1134 # So --env myfunc should also look for BASH_FUNC_myfunc()
1135 @vars = map { $_, "BASH_FUNC_$_()" } @vars;
1136 # Keep only defined variables
1137 @vars = grep { defined($ENV{$_}) } @vars;
1138 # Pre-shellshock style bash function:
1139 # myfunc=() { echo myfunc
1140 # }
1141 # Post-shellshock style bash function:
1142 # BASH_FUNC_myfunc()=() { echo myfunc
1143 # }
1144 my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
1145 my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars;
1146 if(@bash_functions) {
1147 # Functions are not supported for all shells
1148 if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) {
1149 ::warning("Shell functions may not be supported in $Global::shell\n");
1150 }
1151 }
1152
1153 # Pre-shellschock names are without ()
1154 my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions;
1155 # Post-shellschock names are with ()
1156 my @bash_post_shellshock = grep { /\(\)/ } @bash_functions;
1157
1158 my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) }
1159 grep { not /^parallel_bash_environment$/ } @non_functions);
1160 my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) }
1161 @non_functions, @bash_pre_shellshock);
1162
1163 push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock;
1164 push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock;
1165
1166 #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"'
1167 #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a'
1168 #'\}\ \;myfunc\ 1;
1169
1170 # Check if any variables contain \n
1171 if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) {
1172 # \n is bad for csh and will cause it to fail.
1173 $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn;
1174 }
1175
1176 if(not @qcsh) { push @qcsh, "true"; }
1177 if(not @qbash) { push @qbash, "true"; }
1178 # Create lines like:
1179 # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2"
1180 if(@vars) {
1181 $Global::envvar .=
1182 join"",
1183 (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && }
1184 . join(" && ", @qcsh)
1185 . q{ || }
1186 . join(" && ", @qbash)
1187 .q{;});
1188 if($ENV{'parallel_bash_environment'}) {
1189 $Global::envvar .= 'eval "$parallel_bash_environment";'."\n";
1190 }
1191 }
1192 $Global::envvarlen = length $Global::envvar;
1193 }
1194
1195 sub open_joblog {
1196 # Open joblog as specified by --joblog
1197 # Uses:
1198 # $opt::resume
1199 # $opt::resume_failed
1200 # $opt::joblog
1201 # $opt::results
1202 # $Global::job_already_run
1203 # %Global::fd
1204 my $append = 0;
1205 if(($opt::resume or $opt::resume_failed)
1206 and
1207 not ($opt::joblog or $opt::results)) {
1208 ::error("--resume and --resume-failed require --joblog or --results.\n");
1209 ::wait_and_exit(255);
1210 }
1211 if($opt::joblog) {
1212 if($opt::resume || $opt::resume_failed) {
1213 if(open(my $joblog_fh, "<", $opt::joblog)) {
1214 # Read the joblog
1215 $append = <$joblog_fh>; # If there is a header: Open as append later
1216 my $joblog_regexp;
1217 if($opt::resume_failed) {
1218 # Make a regexp that only matches commands with exit+signal=0
1219 # 4 host 1360490623.067 3.445 1023 1222 0 0 command
1220 $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
1221 } else {
1222 # Just match the job number
1223 $joblog_regexp='^(\d+)';
1224 }
1225 while(<$joblog_fh>) {
1226 if(/$joblog_regexp/o) {
1227 # This is 30% faster than set_job_already_run($1);
1228 vec($Global::job_already_run,($1||0),1) = 1;
1229 } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) {
1230 ::error("Format of '$opt::joblog' is wrong: $_");
1231 ::wait_and_exit(255);
1232 }
1233 }
1234 close $joblog_fh;
1235 }
1236 }
1237 if($append) {
1238 # Append to joblog
1239 if(not open($Global::joblog, ">>", $opt::joblog)) {
1240 ::error("Cannot append to --joblog $opt::joblog.\n");
1241 ::wait_and_exit(255);
1242 }
1243 } else {
1244 if($opt::joblog eq "-") {
1245 # Use STDOUT as joblog
1246 $Global::joblog = $Global::fd{1};
1247 } elsif(not open($Global::joblog, ">", $opt::joblog)) {
1248 # Overwrite the joblog
1249 ::error("Cannot write to --joblog $opt::joblog.\n");
1250 ::wait_and_exit(255);
1251 }
1252 print $Global::joblog
1253 join("\t", "Seq", "Host", "Starttime", "JobRuntime",
1254 "Send", "Receive", "Exitval", "Signal", "Command"
1255 ). "\n";
1256 }
1257 }
1258 }
1259
1260 sub find_compression_program {
1261 # Find a fast compression program
1262 # Returns:
1263 # $compress_program = compress program with options
1264 # $decompress_program = decompress program with options
1265
1266 # Search for these. Sorted by speed
1267 my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2);
1268 for my $p (@prg) {
1269 if(which($p)) {
1270 return ("$p -c -1","$p -dc");
1271 }
1272 }
1273 # Fall back to cat
1274 return ("cat","cat");
1275 }
1276
1277
1278 sub read_options {
1279 # Read options from command line, profile and $PARALLEL
1280 # Uses:
1281 # $opt::shebang_wrap
1282 # $opt::shebang
1283 # @ARGV
1284 # $opt::plain
1285 # @opt::profile
1286 # $ENV{'HOME'}
1287 # $ENV{'PARALLEL'}
1288 # Returns:
1289 # @ARGV_no_opt = @ARGV without --options
1290
1291 # This must be done first as this may exec myself
1292 if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
1293 $ARGV[0] =~ /^--shebang-?wrap/ or
1294 $ARGV[0] =~ /^--hashbang/)) {
1295 # Program is called from #! line in script
1296 # remove --shebang-wrap if it is set
1297 $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
1298 # remove --shebang if it is set
1299 $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
1300 # remove --hashbang if it is set
1301 $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
1302 if($opt::shebang) {
1303 my $argfile = shell_quote_scalar(pop @ARGV);
1304 # exec myself to split $ARGV[0] into separate fields
1305 exec "$0 --skip-first-line -a $argfile @ARGV";
1306 }
1307 if($opt::shebang_wrap) {
1308 my @options;
1309 my @parser;
1310 if ($^O eq 'freebsd') {
1311 # FreeBSD's #! puts different values in @ARGV than Linux' does.
1312 my @nooptions = @ARGV;
1313 get_options_from_array(\@nooptions);
1314 while($#ARGV > $#nooptions) {
1315 push @options, shift @ARGV;
1316 }
1317 while(@ARGV and $ARGV[0] ne ":::") {
1318 push @parser, shift @ARGV;
1319 }
1320 if(@ARGV and $ARGV[0] eq ":::") {
1321 shift @ARGV;
1322 }
1323 } else {
1324 @options = shift @ARGV;
1325 }
1326 my $script = shell_quote_scalar(shift @ARGV);
1327 # exec myself to split $ARGV[0] into separate fields
1328 exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV";
1329 }
1330 }
1331
1332 Getopt::Long::Configure("bundling","require_order");
1333 my @ARGV_copy = @ARGV;
1334 # Check if there is a --profile to set @opt::profile
1335 get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
1336 my @ARGV_profile = ();
1337 my @ARGV_env = ();
1338 if(not $opt::plain) {
1339 # Add options from .parallel/config and other profiles
1340 my @config_profiles = (
1341 "/etc/parallel/config",
1342 $ENV{'HOME'}."/.parallel/config",
1343 $ENV{'HOME'}."/.parallelrc");
1344 my @profiles = @config_profiles;
1345 if(@opt::profile) {
1346 # --profile overrides default profiles
1347 @profiles = ();
1348 for my $profile (@opt::profile) {
1349 if(-r $profile) {
1350 push @profiles, $profile;
1351 } else {
1352 push @profiles, $ENV{'HOME'}."/.parallel/".$profile;
1353 }
1354 }
1355 }
1356 for my $profile (@profiles) {
1357 if(-r $profile) {
1358 open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile");
1359 while(<$in_fh>) {
1360 /^\s*\#/ and next;
1361 chomp;
1362 push @ARGV_profile, shellwords($_);
1363 }
1364 close $in_fh;
1365 } else {
1366 if(grep /^$profile$/, @config_profiles) {
1367 # config file is not required to exist
1368 } else {
1369 ::error("$profile not readable.\n");
1370 wait_and_exit(255);
1371 }
1372 }
1373 }
1374 # Add options from shell variable $PARALLEL
1375 if($ENV{'PARALLEL'}) {
1376 @ARGV_env = shellwords($ENV{'PARALLEL'});
1377 }
1378 }
1379 Getopt::Long::Configure("bundling","require_order");
1380 get_options_from_array(\@ARGV_profile) || die_usage();
1381 get_options_from_array(\@ARGV_env) || die_usage();
1382 get_options_from_array(\@ARGV) || die_usage();
1383
1384 # Prepend non-options to @ARGV (such as commands like 'nice')
1385 unshift @ARGV, @ARGV_profile, @ARGV_env;
1386 return @ARGV;
1387 }
1388
1389 sub read_args_from_command_line {
1390 # Arguments given on the command line after:
1391 # ::: ($Global::arg_sep)
1392 # :::: ($Global::arg_file_sep)
1393 # Removes the arguments from @ARGV and:
1394 # - puts filenames into -a
1395 # - puts arguments into files and add the files to -a
1396 # Input:
1397 # @::ARGV = command option ::: arg arg arg :::: argfiles
1398 # Uses:
1399 # $Global::arg_sep
1400 # $Global::arg_file_sep
1401 # $opt::internal_pipe_means_argfiles
1402 # $opt::pipe
1403 # @opt::a
1404 # Returns:
1405 # @argv_no_argsep = @::ARGV without ::: and :::: and following args
1406 my @new_argv = ();
1407 for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
1408 if($arg eq $Global::arg_sep
1409 or
1410 $arg eq $Global::arg_file_sep) {
1411 my $group = $arg; # This group of arguments is args or argfiles
1412 my @group;
1413 while(defined ($arg = shift @ARGV)) {
1414 if($arg eq $Global::arg_sep
1415 or
1416 $arg eq $Global::arg_file_sep) {
1417 # exit while loop if finding new separator
1418 last;
1419 } else {
1420 # If not hitting ::: or ::::
1421 # Append it to the group
1422 push @group, $arg;
1423 }
1424 }
1425
1426 if($group eq $Global::arg_file_sep
1427 or ($opt::internal_pipe_means_argfiles and $opt::pipe)
1428 ) {
1429 # Group of file names on the command line.
1430 # Append args into -a
1431 push @opt::a, @group;
1432 } elsif($group eq $Global::arg_sep) {
1433 # Group of arguments on the command line.
1434 # Put them into a file.
1435 # Create argfile
1436 my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
1437 unlink($name);
1438 # Put args into argfile
1439 print $outfh map { $_,$/ } @group;
1440 seek $outfh, 0, 0;
1441 # Append filehandle to -a
1442 push @opt::a, $outfh;
1443 } else {
1444 ::die_bug("Unknown command line group: $group");
1445 }
1446 if(defined($arg)) {
1447 # $arg is ::: or ::::
1448 redo;
1449 } else {
1450 # $arg is undef -> @ARGV empty
1451 last;
1452 }
1453 }
1454 push @new_argv, $arg;
1455 }
1456 # Output: @ARGV = command to run with options
1457 return @new_argv;
1458 }
1459
1460 sub cleanup {
1461 # Returns: N/A
1462 if(@opt::basefile) { cleanup_basefile(); }
1463 }
1464
1465 sub __QUOTING_ARGUMENTS_FOR_SHELL__ {}
1466
1467 sub shell_quote {
1468 # Input:
1469 # @strings = strings to be quoted
1470 # Output:
1471 # @shell_quoted_strings = string quoted with \ as needed by the shell
1472 my @strings = (@_);
1473 for my $a (@strings) {
1474 $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
1475 $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
1476 }
1477 return wantarray ? @strings : "@strings";
1478 }
1479
1480 sub shell_quote_empty {
1481 # Inputs:
1482 # @strings = strings to be quoted
1483 # Returns:
1484 # @quoted_strings = empty strings quoted as ''.
1485 my @strings = shell_quote(@_);
1486 for my $a (@strings) {
1487 if($a eq "") {
1488 $a = "''";
1489 }
1490 }
1491 return wantarray ? @strings : "@strings";
1492 }
1493
1494 sub shell_quote_scalar {
1495 # Quote the string so shell will not expand any special chars
1496 # Inputs:
1497 # $string = string to be quoted
1498 # Returns:
1499 # $shell_quoted = string quoted with \ as needed by the shell
1500 my $a = $_[0];
1501 if(defined $a) {
1502 # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
1503 # This is 1% faster than the above
1504 $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go;
1505 $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \'
1506 }
1507 return $a;
1508 }
1509
1510 sub shell_quote_file {
1511 # Quote the string so shell will not expand any special chars and prepend ./ if needed
1512 # Input:
1513 # $filename = filename to be shell quoted
1514 # Returns:
1515 # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed
1516 my $a = shell_quote_scalar(shift);
1517 if(defined $a) {
1518 if($a =~ m:^/: or $a =~ m:^\./:) {
1519 # /abs/path or ./rel/path => skip
1520 } else {
1521 # rel/path => ./rel/path
1522 $a = "./".$a;
1523 }
1524 }
1525 return $a;
1526 }
1527
1528 sub shellwords {
1529 # Input:
1530 # $string = shell line
1531 # Returns:
1532 # @shell_words = $string split into words as shell would do
1533 $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
1534 return Text::ParseWords::shellwords(@_);
1535 }
1536
1537
1538 sub __FILEHANDLES__ {}
1539
1540
1541 sub save_stdin_stdout_stderr {
1542 # Remember the original STDIN, STDOUT and STDERR
1543 # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
1544 # Uses:
1545 # %Global::fd
1546 # $Global::original_stderr
1547 # $Global::original_stdin
1548 # Returns: N/A
1549
1550 # Find file descriptors that are already opened (by the shell)
1551 for my $fdno (1..61) {
1552 # /dev/fd/62 and above are used by bash for <(cmd)
1553 my $fh;
1554 # 2-argument-open is used to be compatible with old perl 5.8.0
1555 # bug #43570: Perl 5.8.0 creates 61 files
1556 if(open($fh,">&=$fdno")) {
1557 $Global::fd{$fdno}=$fh;
1558 }
1559 }
1560 open $Global::original_stderr, ">&", "STDERR" or
1561 ::die_bug("Can't dup STDERR: $!");
1562 open $Global::original_stdin, "<&", "STDIN" or
1563 ::die_bug("Can't dup STDIN: $!");
1564 }
1565
1566 sub enough_file_handles {
1567 # Check that we have enough filehandles available for starting
1568 # another job
1569 # Uses:
1570 # $opt::ungroup
1571 # %Global::fd
1572 # Returns:
1573 # 1 if ungrouped (thus not needing extra filehandles)
1574 # 0 if too few filehandles
1575 # 1 if enough filehandles
1576 if(not $opt::ungroup) {
1577 my %fh;
1578 my $enough_filehandles = 1;
1579 # perl uses 7 filehandles for something?
1580 # open3 uses 2 extra filehandles temporarily
1581 # We need a filehandle for each redirected file descriptor
1582 # (normally just STDOUT and STDERR)
1583 for my $i (1..(7+2+keys %Global::fd)) {
1584 $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
1585 }
1586 for (values %fh) { close $_; }
1587 return $enough_filehandles;
1588 } else {
1589 # Ungrouped does not need extra file handles
1590 return 1;
1591 }
1592 }
1593
1594 sub open_or_exit {
1595 # Open a file name or exit if the file cannot be opened
1596 # Inputs:
1597 # $file = filehandle or filename to open
1598 # Uses:
1599 # $Global::stdin_in_opt_a
1600 # $Global::original_stdin
1601 # Returns:
1602 # $fh = file handle to read-opened file
1603 my $file = shift;
1604 if($file eq "-") {
1605 $Global::stdin_in_opt_a = 1;
1606 return ($Global::original_stdin || *STDIN);
1607 }
1608 if(ref $file eq "GLOB") {
1609 # This is an open filehandle
1610 return $file;
1611 }
1612 my $fh = gensym;
1613 if(not open($fh, "<", $file)) {
1614 ::error("Cannot open input file `$file': No such file or directory.\n");
1615 wait_and_exit(255);
1616 }
1617 return $fh;
1618 }
1619
1620 sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {}
1621
1622 # Variable structure:
1623 #
1624 # $Global::running{$pid} = Pointer to Job-object
1625 # @Global::virgin_jobs = Pointer to Job-object that have received no input
1626 # $Global::host{$sshlogin} = Pointer to SSHLogin-object
1627 # $Global::total_running = total number of running jobs
1628 # $Global::total_started = total jobs started
1629
1630 sub init_run_jobs {
1631 $Global::total_running = 0;
1632 $Global::total_started = 0;
1633 $Global::tty_taken = 0;
1634 $SIG{USR1} = \&list_running_jobs;
1635 $SIG{USR2} = \&toggle_progress;
1636 if(@opt::basefile) { setup_basefile(); }
1637 }
1638
1639 {
1640 my $last_time;
1641 my %last_mtime;
1642
1643 sub start_more_jobs {
1644 # Run start_another_job() but only if:
1645 # * not $Global::start_no_new_jobs set
1646 # * not JobQueue is empty
1647 # * not load on server is too high
1648 # * not server swapping
1649 # * not too short time since last remote login
1650 # Uses:
1651 # $Global::max_procs_file
1652 # $Global::max_procs_file_last_mod
1653 # %Global::host
1654 # @opt::sshloginfile
1655 # $Global::start_no_new_jobs
1656 # $opt::filter_hosts
1657 # $Global::JobQueue
1658 # $opt::pipe
1659 # $opt::load
1660 # $opt::noswap
1661 # $opt::delay
1662 # $Global::newest_starttime
1663 # Returns:
1664 # $jobs_started = number of jobs started
1665 my $jobs_started = 0;
1666 my $jobs_started_this_round = 0;
1667 if($Global::start_no_new_jobs) {
1668 return $jobs_started;
1669 }
1670 if(time - ($last_time||0) > 1) {
1671 # At most do this every second
1672 $last_time = time;
1673 if($Global::max_procs_file) {
1674 # --jobs filename
1675 my $mtime = (stat($Global::max_procs_file))[9];
1676 if($mtime > $Global::max_procs_file_last_mod) {
1677 # file changed: Force re-computing max_jobs_running
1678 $Global::max_procs_file_last_mod = $mtime;
1679 for my $sshlogin (values %Global::host) {
1680 $sshlogin->set_max_jobs_running(undef);
1681 }
1682 }
1683 }
1684 if(@opt::sshloginfile) {
1685 # Is --sshloginfile changed?
1686 for my $slf (@opt::sshloginfile) {
1687 my $actual_file = expand_slf_shorthand($slf);
1688 my $mtime = (stat($actual_file))[9];
1689 $last_mtime{$actual_file} ||= $mtime;
1690 if($mtime - $last_mtime{$actual_file} > 1) {
1691 ::debug("run","--sshloginfile $actual_file changed. reload\n");
1692 $last_mtime{$actual_file} = $mtime;
1693 # Reload $slf
1694 # Empty sshlogins
1695 @Global::sshlogin = ();
1696 for (values %Global::host) {
1697 # Don't start new jobs on any host
1698 # except the ones added back later
1699 $_->set_max_jobs_running(0);
1700 }
1701 # This will set max_jobs_running on the SSHlogins
1702 read_sshloginfile($actual_file);
1703 parse_sshlogin();
1704 $opt::filter_hosts and filter_hosts();
1705 setup_basefile();
1706 }
1707 }
1708 }
1709 }
1710 do {
1711 $jobs_started_this_round = 0;
1712 # This will start 1 job on each --sshlogin (if possible)
1713 # thus distribute the jobs on the --sshlogins round robin
1714
1715 for my $sshlogin (values %Global::host) {
1716 if($Global::JobQueue->empty() and not $opt::pipe) {
1717 # No more jobs in the queue
1718 last;
1719 }
1720 debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
1721 $sshlogin->jobs_running(), "\n");
1722 if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
1723 if($opt::load and $sshlogin->loadavg_too_high()) {
1724 # The load is too high or unknown
1725 next;
1726 }
1727 if($opt::noswap and $sshlogin->swapping()) {
1728 # The server is swapping
1729 next;
1730 }
1731 if($sshlogin->too_fast_remote_login()) {
1732 # It has been too short since
1733 next;
1734 }
1735 if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) {
1736 # It has been too short since last start
1737 next;
1738 }
1739 debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(),
1740 " out of ", $sshlogin->max_jobs_running(),
1741 " jobs running. Start another.\n");
1742 if(start_another_job($sshlogin) == 0) {
1743 # No more jobs to start on this $sshlogin
1744 debug("run","No jobs started on ", $sshlogin->string(), "\n");
1745 next;
1746 }
1747 $sshlogin->inc_jobs_running();
1748 $sshlogin->set_last_login_at(::now());
1749 $jobs_started++;
1750 $jobs_started_this_round++;
1751 }
1752 debug("run","Running jobs after on ", $sshlogin->string(), ": ",
1753 $sshlogin->jobs_running(), " of ",
1754 $sshlogin->max_jobs_running(), "\n");
1755 }
1756 } while($jobs_started_this_round);
1757
1758 return $jobs_started;
1759 }
1760 }
1761
1762 {
1763 my $no_more_file_handles_warned;
1764
1765 sub start_another_job {
1766 # If there are enough filehandles
1767 # and JobQueue not empty
1768 # and not $job is in joblog
1769 # Then grab a job from Global::JobQueue,
1770 # start it at sshlogin
1771 # mark it as virgin_job
1772 # Inputs:
1773 # $sshlogin = the SSHLogin to start the job on
1774 # Uses:
1775 # $Global::JobQueue
1776 # $opt::pipe
1777 # $opt::results
1778 # $opt::resume
1779 # @Global::virgin_jobs
1780 # Returns:
1781 # 1 if another jobs was started
1782 # 0 otherwise
1783 my $sshlogin = shift;
1784 # Do we have enough file handles to start another job?
1785 if(enough_file_handles()) {
1786 if($Global::JobQueue->empty() and not $opt::pipe) {
1787 # No more commands to run
1788 debug("start", "Not starting: JobQueue empty\n");
1789 return 0;
1790 } else {
1791 my $job;
1792 # Skip jobs already in job log
1793 # Skip jobs already in results
1794 do {
1795 $job = get_job_with_sshlogin($sshlogin);
1796 if(not defined $job) {
1797 # No command available for that sshlogin
1798 debug("start", "Not starting: no jobs available for ",
1799 $sshlogin->string(), "\n");
1800 return 0;
1801 }
1802 } while ($job->is_already_in_joblog()
1803 or
1804 ($opt::results and $opt::resume and $job->is_already_in_results()));
1805 debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
1806 $job->replaced(),"'\n");
1807 if($job->start()) {
1808 if($opt::pipe) {
1809 push(@Global::virgin_jobs,$job);
1810 }
1811 debug("start", "Started as seq ", $job->seq(),
1812 " pid:", $job->pid(), "\n");
1813 return 1;
1814 } else {
1815 # Not enough processes to run the job.
1816 # Put it back on the queue.
1817 $Global::JobQueue->unget($job);
1818 # Count down the number of jobs to run for this SSHLogin.
1819 my $max = $sshlogin->max_jobs_running();
1820 if($max > 1) { $max--; } else {
1821 ::error("No more processes: cannot run a single job. Something is wrong.\n");
1822 ::wait_and_exit(255);
1823 }
1824 $sshlogin->set_max_jobs_running($max);
1825 # Sleep up to 300 ms to give other processes time to die
1826 ::usleep(rand()*300);
1827 ::warning("No more processes: ",
1828 "Decreasing number of running jobs to $max. ",
1829 "Raising ulimit -u or /etc/security/limits.conf may help.\n");
1830 return 0;
1831 }
1832 }
1833 } else {
1834 # No more file handles
1835 $no_more_file_handles_warned++ or
1836 ::warning("No more file handles. ",
1837 "Raising ulimit -n or /etc/security/limits.conf may help.\n");
1838 return 0;
1839 }
1840 }
1841 }
1842
1843 sub init_progress {
1844 # Uses:
1845 # $opt::bar
1846 # Returns:
1847 # list of computers for progress output
1848 $|=1;
1849 if($opt::bar) {
1850 return("","");
1851 }
1852 my %progress = progress();
1853 return ("\nComputers / CPU cores / Max jobs to run\n",
1854 $progress{'workerlist'});
1855 }
1856
1857 sub drain_job_queue {
1858 # Uses:
1859 # $opt::progress
1860 # $Global::original_stderr
1861 # $Global::total_running
1862 # $Global::max_jobs_running
1863 # %Global::running
1864 # $Global::JobQueue
1865 # %Global::host
1866 # $Global::start_no_new_jobs
1867 # Returns: N/A
1868 if($opt::progress) {
1869 print $Global::original_stderr init_progress();
1870 }
1871 my $last_header="";
1872 my $sleep = 0.2;
1873 do {
1874 while($Global::total_running > 0) {
1875 debug($Global::total_running, "==", scalar
1876 keys %Global::running," slots: ", $Global::max_jobs_running);
1877 if($opt::pipe) {
1878 # When using --pipe sometimes file handles are not closed properly
1879 for my $job (values %Global::running) {
1880 close $job->fh(0,"w");
1881 }
1882 }
1883 if($opt::progress) {
1884 my %progress = progress();
1885 if($last_header ne $progress{'header'}) {
1886 print $Global::original_stderr "\n", $progress{'header'}, "\n";
1887 $last_header = $progress{'header'};
1888 }
1889 print $Global::original_stderr "\r",$progress{'status'};
1890 flush $Global::original_stderr;
1891 }
1892 if($Global::total_running < $Global::max_jobs_running
1893 and not $Global::JobQueue->empty()) {
1894 # These jobs may not be started because of loadavg
1895 # or too little time between each ssh login.
1896 if(start_more_jobs() > 0) {
1897 # Exponential back-on if jobs were started
1898 $sleep = $sleep/2+0.001;
1899 }
1900 }
1901 # Sometimes SIGCHLD is not registered, so force reaper
1902 $sleep = ::reap_usleep($sleep);
1903 }
1904 if(not $Global::JobQueue->empty()) {
1905 # These jobs may not be started:
1906 # * because there the --filter-hosts has removed all
1907 if(not %Global::host) {
1908 ::error("There are no hosts left to run on.\n");
1909 ::wait_and_exit(255);
1910 }
1911 # * because of loadavg
1912 # * because of too little time between each ssh login.
1913 start_more_jobs();
1914 $sleep = ::reap_usleep($sleep);
1915 if($Global::max_jobs_running == 0) {
1916 ::warning("There are no job slots available. Increase --jobs.\n");
1917 }
1918 }
1919 } while ($Global::total_running > 0
1920 or
1921 not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
1922 if($opt::progress) {
1923 my %progress = progress();
1924 print $Global::original_stderr "\r", $progress{'status'}, "\n";
1925 flush $Global::original_stderr;
1926 }
1927 }
1928
1929 sub toggle_progress {
1930 # Turn on/off progress view
1931 # Uses:
1932 # $opt::progress
1933 # $Global::original_stderr
1934 # Returns: N/A
1935 $opt::progress = not $opt::progress;
1936 if($opt::progress) {
1937 print $Global::original_stderr init_progress();
1938 }
1939 }
1940
1941 sub progress {
1942 # Uses:
1943 # $opt::bar
1944 # $opt::eta
1945 # %Global::host
1946 # $Global::total_started
1947 # Returns:
1948 # $workerlist = list of workers
1949 # $header = that will fit on the screen
1950 # $status = message that will fit on the screen
1951 if($opt::bar) {
1952 return ("workerlist" => "", "header" => "", "status" => bar());
1953 }
1954 my $eta = "";
1955 my ($status,$header)=("","");
1956 if($opt::eta) {
1957 my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
1958 compute_eta();
1959 $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
1960 $this_eta, $left, $avgtime);
1961 }
1962 my $termcols = terminal_columns();
1963 my @workers = sort keys %Global::host;
1964 my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers;
1965 my $workerno = 1;
1966 my %workerno = map { ($_=>$workerno++) } @workers;
1967 my $workerlist = "";
1968 for my $w (@workers) {
1969 $workerlist .=
1970 $workerno{$w}.":".$sshlogin{$w} ." / ".
1971 ($Global::host{$w}->ncpus() || "-")." / ".
1972 $Global::host{$w}->max_jobs_running()."\n";
1973 }
1974 $status = "x"x($termcols+1);
1975 if(length $status > $termcols) {
1976 # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
1977 $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
1978 $status = $eta .
1979 join(" ",map
1980 {
1981 if($Global::total_started) {
1982 my $completed = ($Global::host{$_}->jobs_completed()||0);
1983 my $running = $Global::host{$_}->jobs_running();
1984 my $time = $completed ? (time-$^T)/($completed) : "0";
1985 sprintf("%s:%d/%d/%d%%/%.1fs ",
1986 $sshlogin{$_}, $running, $completed,
1987 ($running+$completed)*100
1988 / $Global::total_started, $time);
1989 }
1990 } @workers);
1991 }
1992 if(length $status > $termcols) {
1993 # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
1994 $header = "Computer:jobs running/jobs completed/%of started jobs";
1995 $status = $eta .
1996 join(" ",map
1997 {
1998 my $completed = ($Global::host{$_}->jobs_completed()||0);
1999 my $running = $Global::host{$_}->jobs_running();
2000 my $time = $completed ? (time-$^T)/($completed) : "0";
2001 sprintf("%s:%d/%d/%d%%/%.1fs ",
2002 $workerno{$_}, $running, $completed,
2003 ($running+$completed)*100
2004 / $Global::total_started, $time);
2005 } @workers);
2006 }
2007 if(length $status > $termcols) {
2008 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
2009 $header = "Computer:jobs running/jobs completed/%of started jobs";
2010 $status = $eta .
2011 join(" ",map
2012 { sprintf("%s:%d/%d/%d%%",
2013 $sshlogin{$_},
2014 $Global::host{$_}->jobs_running(),
2015 ($Global::host{$_}->jobs_completed()||0),
2016 ($Global::host{$_}->jobs_running()+
2017 ($Global::host{$_}->jobs_completed()||0))*100
2018 / $Global::total_started) }
2019 @workers);
2020 }
2021 if(length $status > $termcols) {
2022 # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
2023 $header = "Computer:jobs running/jobs completed/%of started jobs";
2024 $status = $eta .
2025 join(" ",map
2026 { sprintf("%s:%d/%d/%d%%",
2027 $workerno{$_},
2028 $Global::host{$_}->jobs_running(),
2029 ($Global::host{$_}->jobs_completed()||0),
2030 ($Global::host{$_}->jobs_running()+
2031 ($Global::host{$_}->jobs_completed()||0))*100
2032 / $Global::total_started) }
2033 @workers);
2034 }
2035 if(length $status > $termcols) {
2036 # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
2037 $header = "Computer:jobs running/jobs completed";
2038 $status = $eta .
2039 join(" ",map
2040 { sprintf("%s:%d/%d",
2041 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
2042 ($Global::host{$_}->jobs_completed()||0)) }
2043 @workers);
2044 }
2045 if(length $status > $termcols) {
2046 # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
2047 $header = "Computer:jobs running/jobs completed";
2048 $status = $eta .
2049 join(" ",map
2050 { sprintf("%s:%d/%d",
2051 $sshlogin{$_}, $Global::host{$_}->jobs_running(),
2052 ($Global::host{$_}->jobs_completed()||0)) }
2053 @workers);
2054 }
2055 if(length $status > $termcols) {
2056 # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
2057 $header = "Computer:jobs running/jobs completed";
2058 $status = $eta .
2059 join(" ",map
2060 { sprintf("%s:%d/%d",
2061 $workerno{$_}, $Global::host{$_}->jobs_running(),
2062 ($Global::host{$_}->jobs_completed()||0)) }
2063 @workers);
2064 }
2065 if(length $status > $termcols) {
2066 # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
2067 $header = "Computer:jobs completed";
2068 $status = $eta .
2069 join(" ",map
2070 { sprintf("%s:%d",
2071 $sshlogin{$_},
2072 ($Global::host{$_}->jobs_completed()||0)) }
2073 @workers);
2074 }
2075 if(length $status > $termcols) {
2076 # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
2077 $header = "Computer:jobs completed";
2078 $status = $eta .
2079 join(" ",map
2080 { sprintf("%s:%d",
2081 $workerno{$_},
2082 ($Global::host{$_}->jobs_completed()||0)) }
2083 @workers);
2084 }
2085 return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
2086 }
2087
2088 {
2089 my ($total, $first_completed, $smoothed_avg_time);
2090
2091 sub compute_eta {
2092 # Calculate important numbers for ETA
2093 # Returns:
2094 # $total = number of jobs in total
2095 # $completed = number of jobs completed
2096 # $left = number of jobs left
2097 # $pctcomplete = percent of jobs completed
2098 # $avgtime = averaged time
2099 # $eta = smoothed eta
2100 $total ||= $Global::JobQueue->total_jobs();
2101 my $completed = 0;
2102 for(values %Global::host) { $completed += $_->jobs_completed() }
2103 my $left = $total - $completed;
2104 if(not $completed) {
2105 return($total, $completed, $left, 0, 0, 0);
2106 }
2107 my $pctcomplete = $completed / $total;
2108 $first_completed ||= time;
2109 my $timepassed = (time - $first_completed);
2110 my $avgtime = $timepassed / $completed;
2111 $smoothed_avg_time ||= $avgtime;
2112 # Smooth the eta so it does not jump wildly
2113 $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
2114 $pctcomplete * $avgtime;
2115 my $eta = int($left * $smoothed_avg_time);
2116 return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
2117 }
2118 }
2119
2120 {
2121 my ($rev,$reset);
2122
2123 sub bar {
2124 # Return:
2125 # $status = bar with eta, completed jobs, arg and pct
2126 $rev ||= "\033[7m";
2127 $reset ||= "\033[0m";
2128 my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
2129 compute_eta();
2130 my $arg = $Global::newest_job ?
2131 $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : "";
2132 # These chars mess up display in the terminal
2133 $arg =~ tr/[\011-\016\033\302-\365]//d;
2134 my $bar_text =
2135 sprintf("%d%% %d:%d=%ds %s",
2136 $pctcomplete*100, $completed, $left, $eta, $arg);
2137 my $terminal_width = terminal_columns();
2138 my $s = sprintf("%-${terminal_width}s",
2139 substr($bar_text." "x$terminal_width,
2140 0,$terminal_width));
2141 my $width = int($terminal_width * $pctcomplete);
2142 substr($s,$width,0) = $reset;
2143 my $zenity = sprintf("%-${terminal_width}s",
2144 substr("# $eta sec $arg",
2145 0,$terminal_width));
2146 $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
2147 "\r" . $rev . $s . $reset;
2148 return $s;
2149 }
2150 }
2151
2152 {
2153 my ($columns,$last_column_time);
2154
2155 sub terminal_columns {
2156 # Get the number of columns of the display
2157 # Returns:
2158 # number of columns of the screen
2159 if(not $columns or $last_column_time < time) {
2160 $last_column_time = time;
2161 $columns = $ENV{'COLUMNS'};
2162 if(not $columns) {
2163 my $resize = qx{ resize 2>/dev/null };
2164 $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
2165 }
2166 $columns ||= 80;
2167 }
2168 return $columns;
2169 }
2170 }
2171
2172 sub get_job_with_sshlogin {
2173 # Returns:
2174 # next job object for $sshlogin if any available
2175 my $sshlogin = shift;
2176 my $job = undef;
2177
2178 if ($opt::hostgroups) {
2179 my @other_hostgroup_jobs = ();
2180
2181 while($job = $Global::JobQueue->get()) {
2182 if($sshlogin->in_hostgroups($job->hostgroups())) {
2183 # Found a job for this hostgroup
2184 last;
2185 } else {
2186 # This job was not in the hostgroups of $sshlogin
2187 push @other_hostgroup_jobs, $job;
2188 }
2189 }
2190 $Global::JobQueue->unget(@other_hostgroup_jobs);
2191 if(not defined $job) {
2192 # No more jobs
2193 return undef;
2194 }
2195 } else {
2196 $job = $Global::JobQueue->get();
2197 if(not defined $job) {
2198 # No more jobs
2199 ::debug("start", "No more jobs: JobQueue empty\n");
2200 return undef;
2201 }
2202 }
2203
2204 my $clean_command = $job->replaced();
2205 if($clean_command =~ /^\s*$/) {
2206 # Do not run empty lines
2207 if(not $Global::JobQueue->empty()) {
2208 return get_job_with_sshlogin($sshlogin);
2209 } else {
2210 return undef;
2211 }
2212 }
2213 $job->set_sshlogin($sshlogin);
2214 if($opt::retries and $clean_command and
2215 $job->failed_here()) {
2216 # This command with these args failed for this sshlogin
2217 my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
2218 # Only look at the Global::host that have > 0 jobslots
2219 if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host
2220 and $job->failed_here() == $min_failures) {
2221 # It failed the same or more times on another host:
2222 # run it on this host
2223 } else {
2224 # If it failed fewer times on another host:
2225 # Find another job to run
2226 my $nextjob;
2227 if(not $Global::JobQueue->empty()) {
2228 # This can potentially recurse for all args
2229 no warnings 'recursion';
2230 $nextjob = get_job_with_sshlogin($sshlogin);
2231 }
2232 # Push the command back on the queue
2233 $Global::JobQueue->unget($job);
2234 return $nextjob;
2235 }
2236 }
2237 return $job;
2238 }
2239
2240 sub __REMOTE_SSH__ {}
2241
2242 sub read_sshloginfiles {
2243 # Returns: N/A
2244 for my $s (@_) {
2245 read_sshloginfile(expand_slf_shorthand($s));
2246 }
2247 }
2248
2249 sub expand_slf_shorthand {
2250 my $file = shift;
2251 if($file eq "-") {
2252 # skip: It is stdin
2253 } elsif($file eq "..") {
2254 $file = $ENV{'HOME'}."/.parallel/sshloginfile";
2255 } elsif($file eq ".") {
2256 $file = "/etc/parallel/sshloginfile";
2257 } elsif(not -r $file) {
2258 if(not -r $ENV{'HOME'}."/.parallel/".$file) {
2259 # Try prepending ~/.parallel
2260 ::error("Cannot open $file.\n");
2261 ::wait_and_exit(255);
2262 } else {
2263 $file = $ENV{'HOME'}."/.parallel/".$file;
2264 }
2265 }
2266 return $file;
2267 }
2268
2269 sub read_sshloginfile {
2270 # Returns: N/A
2271 my $file = shift;
2272 my $close = 1;
2273 my $in_fh;
2274 ::debug("init","--slf ",$file);
2275 if($file eq "-") {
2276 $in_fh = *STDIN;
2277 $close = 0;
2278 } else {
2279 if(not open($in_fh, "<", $file)) {
2280 # Try the filename
2281 ::error("Cannot open $file.\n");
2282 ::wait_and_exit(255);
2283 }
2284 }
2285 while(<$in_fh>) {
2286 chomp;
2287 /^\s*#/ and next;
2288 /^\s*$/ and next;
2289 push @Global::sshlogin, $_;
2290 }
2291 if($close) {
2292 close $in_fh;
2293 }
2294 }
2295
2296 sub parse_sshlogin {
2297 # Returns: N/A
2298 my @login;
2299 if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
2300 for my $sshlogin (@Global::sshlogin) {
2301 # Split up -S sshlogin,sshlogin
2302 for my $s (split /,/, $sshlogin) {
2303 if ($s eq ".." or $s eq "-") {
2304 # This may add to @Global::sshlogin - possibly bug
2305 read_sshloginfile(expand_slf_shorthand($s));
2306 } else {
2307 push (@login, $s);
2308 }
2309 }
2310 }
2311 $Global::minimal_command_line_length = 8_000_000;
2312 my @allowed_hostgroups;
2313 for my $ncpu_sshlogin_string (::uniq(@login)) {
2314 my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
2315 my $sshlogin_string = $sshlogin->string();
2316 if($sshlogin_string eq "") {
2317 # This is an ssh group: -S @webservers
2318 push @allowed_hostgroups, $sshlogin->hostgroups();
2319 next;
2320 }
2321 if($Global::host{$sshlogin_string}) {
2322 # This sshlogin has already been added:
2323 # It is probably a host that has come back
2324 # Set the max_jobs_running back to the original
2325 debug("run","Already seen $sshlogin_string\n");
2326 if($sshlogin->{'ncpus'}) {
2327 # If ncpus set by '#/' of the sshlogin, overwrite it:
2328 $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
2329 }
2330 $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
2331 next;
2332 }
2333 if($sshlogin_string eq ":") {
2334 $sshlogin->set_maxlength(Limits::Command::max_length());
2335 } else {
2336 # If all chars needs to be quoted, every other character will be \
2337 $sshlogin->set_maxlength(int(Limits::Command::max_length()/2));
2338 }
2339 $Global::minimal_command_line_length =
2340 ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
2341 $Global::host{$sshlogin_string} = $sshlogin;
2342 }
2343 if(@allowed_hostgroups) {
2344 # Remove hosts that are not in these groups
2345 while (my ($string, $sshlogin) = each %Global::host) {
2346 if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
2347 delete $Global::host{$string};
2348 }
2349 }
2350 }
2351
2352 # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
2353 if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) {
2354 if(not remote_hosts()) {
2355 # There are no remote hosts
2356 if(@opt::trc) {
2357 ::warning("--trc ignored as there are no remote --sshlogin.\n");
2358 } elsif (defined $opt::transfer) {
2359 ::warning("--transfer ignored as there are no remote --sshlogin.\n");
2360 } elsif (@opt::return) {
2361 ::warning("--return ignored as there are no remote --sshlogin.\n");
2362 } elsif (defined $opt::cleanup) {
2363 ::warning("--cleanup ignored as there are no remote --sshlogin.\n");
2364 } elsif (@opt::basefile) {
2365 ::warning("--basefile ignored as there are no remote --sshlogin.\n");
2366 }
2367 }
2368 }
2369 }
2370
2371 sub remote_hosts {
2372 # Return sshlogins that are not ':'
2373 # Returns:
2374 # list of sshlogins with ':' removed
2375 return grep !/^:$/, keys %Global::host;
2376 }
2377
2378 sub setup_basefile {
2379 # Transfer basefiles to each $sshlogin
2380 # This needs to be done before first jobs on $sshlogin is run
2381 # Returns: N/A
2382 my $cmd = "";
2383 my $rsync_destdir;
2384 my $workdir;
2385 for my $sshlogin (values %Global::host) {
2386 if($sshlogin->string() eq ":") { next }
2387 for my $file (@opt::basefile) {
2388 if($file !~ m:^/: and $opt::workdir eq "...") {
2389 ::error("Work dir '...' will not work with relative basefiles\n");
2390 ::wait_and_exit(255);
2391 }
2392 $workdir ||= Job->new("")->workdir();
2393 $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&";
2394 }
2395 }
2396 $cmd .= "wait;";
2397 debug("init", "basesetup: $cmd\n");
2398 print `$cmd`;
2399 }
2400
2401 sub cleanup_basefile {
2402 # Remove the basefiles transferred
2403 # Returns: N/A
2404 my $cmd="";
2405 my $workdir = Job->new("")->workdir();
2406 for my $sshlogin (values %Global::host) {
2407 if($sshlogin->string() eq ":") { next }
2408 for my $file (@opt::basefile) {
2409 $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&";
2410 }
2411 }
2412 $cmd .= "wait;";
2413 debug("init", "basecleanup: $cmd\n");
2414 print `$cmd`;
2415 }
2416
2417 sub filter_hosts {
2418 my(@cores, @cpus, @maxline, @echo);
2419 my $envvar = ::shell_quote_scalar($Global::envvar);
2420 while (my ($host, $sshlogin) = each %Global::host) {
2421 if($host eq ":") { next }
2422 # The 'true' is used to get the $host out later
2423 my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
2424 push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0");
2425 push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0");
2426 push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0");
2427 # 'echo' is used to get the best possible value for an ssh login time
2428 push(@echo, $host."\t".$sshcmd." echo\n\0");
2429 }
2430 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh");
2431 print $fh @cores, @cpus, @maxline, @echo;
2432 close $fh;
2433 # --timeout 5: Setting up an SSH connection and running a simple
2434 # command should never take > 5 sec.
2435 # --delay 0.1: If multiple sshlogins use the same proxy the delay
2436 # will make it less likely to overload the ssh daemon.
2437 # --retries 3: If the ssh daemon it overloaded, try 3 times
2438 # -s 16000: Half of the max line on UnixWare
2439 my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
2440 ::debug("init", $cmd, "\n");
2441 open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
2442 my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
2443 my $prepend = "";
2444 while(<$host_fh>) {
2445 if(/\'$/) {
2446 # if last char = ' then append next line
2447 # This may be due to quoting of $Global::envvar
2448 $prepend .= $_;
2449 next;
2450 }
2451 $_ = $prepend . $_;
2452 $prepend = "";
2453 chomp;
2454 my @col = split /\t/, $_;
2455 if(defined $col[6]) {
2456 # This is a line from --joblog
2457 # seq host time spent sent received exit signal command
2458 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
2459 if($col[0] eq "Seq" and $col[1] eq "Host" and
2460 $col[2] eq "Starttime") {
2461 # Header => skip
2462 next;
2463 }
2464 # Get server from: eval true server\;
2465 $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
2466 my $host = $1;
2467 $host =~ tr/\\//d;
2468 $Global::host{$host} or next;
2469 if($col[6] eq "255" or $col[7] eq "15") {
2470 # exit == 255 or signal == 15: ssh failed
2471 # Remove sshlogin
2472 ::debug("init", "--filtered $host\n");
2473 push(@down_hosts, $host);
2474 @down_hosts = uniq(@down_hosts);
2475 } elsif($col[6] eq "127") {
2476 # signal == 127: parallel not installed remote
2477 # Set ncpus and ncores = 1
2478 ::warning("Could not figure out ",
2479 "number of cpus on $host. Using 1.\n");
2480 $ncores{$host} = 1;
2481 $ncpus{$host} = 1;
2482 $maxlen{$host} = Limits::Command::max_length();
2483 } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
2484 # Remember how log it took to log in
2485 # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
2486 $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
2487 } else {
2488 ::die_bug("host check unmatched long jobline: $_");
2489 }
2490 } elsif($Global::host{$col[0]}) {
2491 # This output from --number-of-cores, --number-of-cpus,
2492 # --max-line-length-allowed
2493 # ncores: server 8
2494 # ncpus: server 2
2495 # maxlen: server 131071
2496 if(not $ncores{$col[0]}) {
2497 $ncores{$col[0]} = $col[1];
2498 } elsif(not $ncpus{$col[0]}) {
2499 $ncpus{$col[0]} = $col[1];
2500 } elsif(not $maxlen{$col[0]}) {
2501 $maxlen{$col[0]} = $col[1];
2502 } elsif(not $echo{$col[0]}) {
2503 $echo{$col[0]} = $col[1];
2504 } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
2505 # Skip these:
2506 # perl: warning: Setting locale failed.
2507 # perl: warning: Please check that your locale settings:
2508 # LANGUAGE = (unset),
2509 # LC_ALL = (unset),
2510 # LANG = "en_US.UTF-8"
2511 # are supported and installed on your system.
2512 # perl: warning: Falling back to the standard locale ("C").
2513 } else {
2514 ::die_bug("host check too many col0: $_");
2515 }
2516 } else {
2517 ::die_bug("host check unmatched short jobline ($col[0]): $_");
2518 }
2519 }
2520 close $host_fh;
2521 $Global::debug or unlink $tmpfile;
2522 delete @Global::host{@down_hosts};
2523 @down_hosts and ::warning("Removed @down_hosts\n");
2524 $Global::minimal_command_line_length = 8_000_000;
2525 while (my ($sshlogin, $obj) = each %Global::host) {
2526 if($sshlogin eq ":") { next }
2527 $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
2528 $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
2529 $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
2530 $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
2531 if($opt::use_cpus_instead_of_cores) {
2532 $obj->set_ncpus($ncpus{$sshlogin});
2533 } else {
2534 $obj->set_ncpus($ncores{$sshlogin});
2535 }
2536 $obj->set_time_to_login($time_to_login{$sshlogin});
2537 $obj->set_maxlength($maxlen{$sshlogin});
2538 $Global::minimal_command_line_length =
2539 ::min($Global::minimal_command_line_length,
2540 int($maxlen{$sshlogin}/2));
2541 ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin},
2542 " ncores:", $ncores{$sshlogin},
2543 " time_to_login:", $time_to_login{$sshlogin},
2544 " maxlen:", $maxlen{$sshlogin},
2545 " min_max_len:", $Global::minimal_command_line_length,"\n");
2546 }
2547 }
2548
2549 sub onall {
2550 sub tmp_joblog {
2551 my $joblog = shift;
2552 if(not defined $joblog) {
2553 return undef;
2554 }
2555 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
2556 close $fh;
2557 return $tmpfile;
2558 }
2559 my @command = @_;
2560 if($Global::quoting) {
2561 @command = shell_quote_empty(@command);
2562 }
2563
2564 # Copy all @fhlist into tempfiles
2565 my @argfiles = ();
2566 for my $fh (@fhlist) {
2567 my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1);
2568 print $outfh (<$fh>);
2569 close $outfh;
2570 push @argfiles, $name;
2571 }
2572 if(@opt::basefile) { setup_basefile(); }
2573 # for each sshlogin do:
2574 # parallel -S $sshlogin $command :::: @argfiles
2575 #
2576 # Pass some of the options to the sub-parallels, not all of them as
2577 # -P should only go to the first, and -S should not be copied at all.
2578 my $options =
2579 join(" ",
2580 ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
2581 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
2582 ((defined $opt::ungroup) ? "-u" : ""),
2583 ((defined $opt::group) ? "-g" : ""),
2584 ((defined $opt::keeporder) ? "--keeporder" : ""),
2585 ((defined $opt::D) ? "-D $opt::D" : ""),
2586 ((defined $opt::plain) ? "--plain" : ""),
2587 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
2588 );
2589 my $suboptions =
2590 join(" ",
2591 ((defined $opt::ungroup) ? "-u" : ""),
2592 ((defined $opt::linebuffer) ? "--linebuffer" : ""),
2593 ((defined $opt::group) ? "-g" : ""),
2594 ((defined $opt::files) ? "--files" : ""),
2595 ((defined $opt::keeporder) ? "--keeporder" : ""),
2596 ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
2597 ((@opt::v) ? "-vv" : ""),
2598 ((defined $opt::D) ? "-D $opt::D" : ""),
2599 ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
2600 ((defined $opt::plain) ? "--plain" : ""),
2601 ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
2602 ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
2603 ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
2604 ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
2605 (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
2606 );
2607 ::debug("init", "| $0 $options\n");
2608 open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") ||
2609 ::die_bug("This does not run GNU Parallel: $0 $options");
2610 my @joblogs;
2611 for my $host (sort keys %Global::host) {
2612 my $sshlogin = $Global::host{$host};
2613 my $joblog = tmp_joblog($opt::joblog);
2614 if($joblog) {
2615 push @joblogs, $joblog;
2616 $joblog = "--joblog $joblog";
2617 }
2618 my $quad = $opt::arg_file_sep || "::::";
2619 ::debug("init", "$0 $suboptions -j1 $joblog ",
2620 ((defined $opt::tag) ?
2621 "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
2622 " -S ", shell_quote_scalar($sshlogin->string())," ",
2623 join(" ",shell_quote(@command))," $quad @argfiles\n");
2624 print $parallel_fh "$0 $suboptions -j1 $joblog ",
2625 ((defined $opt::tag) ?
2626 "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
2627 " -S ", shell_quote_scalar($sshlogin->string())," ",
2628 join(" ",shell_quote(@command))," $quad @argfiles\n";
2629 }
2630 close $parallel_fh;
2631 $Global::exitstatus = $? >> 8;
2632 debug("init", "--onall exitvalue ", $?);
2633 if(@opt::basefile) { cleanup_basefile(); }
2634 $Global::debug or unlink(@argfiles);
2635 my %seen;
2636 for my $joblog (@joblogs) {
2637 # Append to $joblog
2638 open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
2639 # Skip first line (header);
2640 <$fh>;
2641 print $Global::joblog (<$fh>);
2642 close $fh;
2643 unlink($joblog);
2644 }
2645 }
2646
2647 sub __SIGNAL_HANDLING__ {}
2648
2649 sub save_original_signal_handler {
2650 # Remember the original signal handler
2651 # Returns: N/A
2652 $SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X
2653 $SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
2654 unlink keys %Global::unlink; exit -1 };
2655 $SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
2656 unlink keys %Global::unlink; exit -1 };
2657 %Global::original_sig = %SIG;
2658 $SIG{TERM} = sub {}; # Dummy until jobs really start
2659 }
2660
2661 sub list_running_jobs {
2662 # Returns: N/A
2663 for my $v (values %Global::running) {
2664 print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n";
2665 }
2666 }
2667
2668 sub start_no_new_jobs {
2669 # Returns: N/A
2670 $SIG{TERM} = $Global::original_sig{TERM};
2671 print $Global::original_stderr
2672 ("$Global::progname: SIGTERM received. No new jobs will be started.\n",
2673 "$Global::progname: Waiting for these ", scalar(keys %Global::running),
2674 " jobs to finish. Send SIGTERM again to stop now.\n");
2675 list_running_jobs();
2676 $Global::start_no_new_jobs ||= 1;
2677 }
2678
2679 sub reaper {
2680 # A job finished.
2681 # Print the output.
2682 # Start another job
2683 # Returns: N/A
2684 my $stiff;
2685 my $children_reaped = 0;
2686 debug("run", "Reaper ");
2687 while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
2688 $children_reaped++;
2689 if($Global::sshmaster{$stiff}) {
2690 # This is one of the ssh -M: ignore
2691 next;
2692 }
2693 my $job = $Global::running{$stiff};
2694 # '-a <(seq 10)' will give us a pid not in %Global::running
2695 $job or next;
2696 $job->set_exitstatus($? >> 8);
2697 $job->set_exitsignal($? & 127);
2698 debug("run", "died (", $job->exitstatus(), "): ", $job->seq());
2699 $job->set_endtime(::now());
2700 if($stiff == $Global::tty_taken) {
2701 # The process that died had the tty => release it
2702 $Global::tty_taken = 0;
2703 }
2704
2705 if(not $job->should_be_retried()) {
2706 # The job is done
2707 # Free the jobslot
2708 push @Global::slots, $job->slot();
2709 if($opt::timeout) {
2710 # Update average runtime for timeout
2711 $Global::timeoutq->update_delta_time($job->runtime());
2712 }
2713 # Force printing now if the job failed and we are going to exit
2714 my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2
2715 and $job->exitstatus());
2716 if($opt::keeporder and not $print_now) {
2717 print_earlier_jobs($job);
2718 } else {
2719 $job->print();
2720 }
2721 if($job->exitstatus()) {
2722 process_failed_job($job);
2723 }
2724
2725 }
2726 my $sshlogin = $job->sshlogin();
2727 $sshlogin->dec_jobs_running();
2728 $sshlogin->inc_jobs_completed();
2729 $Global::total_running--;
2730 delete $Global::running{$stiff};
2731 start_more_jobs();
2732 }
2733 debug("run", "done ");
2734 return $children_reaped;
2735 }
2736
2737 sub process_failed_job {
2738 # The jobs had a exit status <> 0, so error
2739 # Returns: N/A
2740 my $job = shift;
2741 $Global::exitstatus++;
2742 $Global::total_failed++;
2743 if($opt::halt_on_error) {
2744 if($opt::halt_on_error == 1
2745 or
2746 ($opt::halt_on_error < 1 and $Global::total_failed > 3
2747 and
2748 $Global::total_failed / $Global::total_started > $opt::halt_on_error)) {
2749 # If halt on error == 1 or --halt 10%
2750 # we should gracefully exit
2751 print $Global::original_stderr
2752 ("$Global::progname: Starting no more jobs. ",
2753 "Waiting for ", scalar(keys %Global::running),
2754 " jobs to finish. This job failed:\n",
2755 $job->replaced(),"\n");
2756 $Global::start_no_new_jobs ||= 1;
2757 $Global::halt_on_error_exitstatus = $job->exitstatus();
2758 } elsif($opt::halt_on_error == 2) {
2759 # If halt on error == 2 we should exit immediately
2760 print $Global::original_stderr
2761 ("$Global::progname: This job failed:\n",
2762 $job->replaced(),"\n");
2763 exit ($job->exitstatus());
2764 }
2765 }
2766 }
2767
2768 {
2769 my (%print_later,$job_end_sequence);
2770
2771 sub print_earlier_jobs {
2772 # Print jobs completed earlier
2773 # Returns: N/A
2774 my $job = shift;
2775 $print_later{$job->seq()} = $job;
2776 $job_end_sequence ||= 1;
2777 debug("run", "Looking for: $job_end_sequence ",
2778 "Current: ", $job->seq(), "\n");
2779 for(my $j = $print_later{$job_end_sequence};
2780 $j or vec($Global::job_already_run,$job_end_sequence,1);
2781 $job_end_sequence++,
2782 $j = $print_later{$job_end_sequence}) {
2783 debug("run", "Found job end $job_end_sequence");
2784 if($j) {
2785 $j->print();
2786 delete $print_later{$job_end_sequence};
2787 }
2788 }
2789 }
2790 }
2791
2792 sub __USAGE__ {}
2793
2794 sub wait_and_exit {
2795 # If we do not wait, we sometimes get segfault
2796 # Returns: N/A
2797 my $error = shift;
2798 if($error) {
2799 # Kill all without printing
2800 for my $job (values %Global::running) {
2801 $job->kill("TERM");
2802 $job->kill("TERM");
2803 }
2804 }
2805 for (keys %Global::unkilled_children) {
2806 kill 9, $_;
2807 waitpid($_,0);
2808 delete $Global::unkilled_children{$_};
2809 }
2810 wait();
2811 exit($error);
2812 }
2813
2814 sub die_usage {
2815 # Returns: N/A
2816 usage();
2817 wait_and_exit(255);
2818 }
2819
2820 sub usage {
2821 # Returns: N/A
2822 print join
2823 ("\n",
2824 "Usage:",
2825 "",
2826 "$Global::progname [options] [command [arguments]] < list_of_arguments",
2827 "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
2828 "cat ... | $Global::progname --pipe [options] [command [arguments]]",
2829 "",
2830 "-j n Run n jobs in parallel",
2831 "-k Keep same order",
2832 "-X Multiple arguments with context replace",
2833 "--colsep regexp Split input on regexp for positional replacements",
2834 "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
2835 "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
2836 "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
2837 " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
2838 "",
2839 "-S sshlogin Example: foo\@server.example.com",
2840 "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
2841 "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
2842 "--onall Run the given command with argument on all sshlogins",
2843 "--nonall Run the given command with no arguments on all sshlogins",
2844 "",
2845 "--pipe Split stdin (standard input) to multiple jobs.",
2846 "--recend str Record end separator for --pipe.",
2847 "--recstart str Record start separator for --pipe.",
2848 "",
2849 "See 'man $Global::progname' for details",
2850 "",
2851 "When using programs that use GNU Parallel to process data for publication please cite:",
2852 "",
2853 "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,",
2854 ";login: The USENIX Magazine, February 2011:42-47.",
2855 "",
2856 "Or you can get GNU Parallel without this requirement by paying 10000 EUR.",
2857 "");
2858 }
2859
2860
2861 sub citation_notice {
2862 # if --no-notice or --plain: do nothing
2863 # if stderr redirected: do nothing
2864 # if ~/.parallel/will-cite: do nothing
2865 # else: print citation notice to stderr
2866 if($opt::no_notice
2867 or
2868 $opt::plain
2869 or
2870 not -t $Global::original_stderr
2871 or
2872 -e $ENV{'HOME'}."/.parallel/will-cite") {
2873 # skip
2874 } else {
2875 print $Global::original_stderr
2876 ("When using programs that use GNU Parallel to process data for publication please cite:\n",
2877 "\n",
2878 " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n",
2879 " ;login: The USENIX Magazine, February 2011:42-47.\n",
2880 "\n",
2881 "This helps funding further development; and it won't cost you a cent.\n",
2882 "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n",
2883 "\n",
2884 "To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n",
2885 );
2886 flush $Global::original_stderr;
2887 }
2888 }
2889
2890
2891 sub warning {
2892 my @w = @_;
2893 my $fh = $Global::original_stderr || *STDERR;
2894 my $prog = $Global::progname || "parallel";
2895 print $fh $prog, ": Warning: ", @w;
2896 }
2897
2898
2899 sub error {
2900 my @w = @_;
2901 my $fh = $Global::original_stderr || *STDERR;
2902 my $prog = $Global::progname || "parallel";
2903 print $fh $prog, ": Error: ", @w;
2904 }
2905
2906
2907 sub die_bug {
2908 my $bugid = shift;
2909 print STDERR
2910 ("$Global::progname: This should not happen. You have found a bug.\n",
2911 "Please contact <parallel\@gnu.org> and include:\n",
2912 "* The version number: $Global::version\n",
2913 "* The bugid: $bugid\n",
2914 "* The command line being run\n",
2915 "* The files being read (put the files on a webserver if they are big)\n",
2916 "\n",
2917 "If you get the error on smaller/fewer files, please include those instead.\n");
2918 ::wait_and_exit(255);
2919 }
2920
2921 sub version {
2922 # Returns: N/A
2923 if($opt::tollef and not $opt::gnu) {
2924 print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
2925 }
2926 print join("\n",
2927 "GNU $Global::progname $Global::version",
2928 "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.",
2929 "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>",
2930 "This is free software: you are free to change and redistribute it.",
2931 "GNU $Global::progname comes with no warranty.",
2932 "",
2933 "Web site: http://www.gnu.org/software/${Global::progname}\n",
2934 "When using programs that use GNU Parallel to process data for publication please cite:\n",
2935 "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ",
2936 ";login: The USENIX Magazine, February 2011:42-47.\n",
2937 "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n",
2938 );
2939 }
2940
2941 sub bibtex {
2942 # Returns: N/A
2943 if($opt::tollef and not $opt::gnu) {
2944 print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n";
2945 }
2946 print join("\n",
2947 "When using programs that use GNU Parallel to process data for publication please cite:",
2948 "",
2949 "\@article{Tange2011a,",
2950 " title = {GNU Parallel - The Command-Line Power Tool},",
2951 " author = {O. Tange},",
2952 " address = {Frederiksberg, Denmark},",
2953 " journal = {;login: The USENIX Magazine},",
2954 " month = {Feb},",
2955 " number = {1},",
2956 " volume = {36},",
2957 " url = {http://www.gnu.org/s/parallel},",
2958 " year = {2011},",
2959 " pages = {42-47}",
2960 "}",
2961 "",
2962 "(Feel free to use \\nocite{Tange2011a})",
2963 "",
2964 "This helps funding further development.",
2965 "",
2966 "Or you can get GNU Parallel without this requirement by paying 10000 EUR.",
2967 ""
2968 );
2969 while(not -e $ENV{'HOME'}."/.parallel/will-cite") {
2970 print "\nType: 'will cite' and press enter.\n> ";
2971 my $input = <STDIN>;
2972 if($input =~ /will cite/i) {
2973 mkdir $ENV{'HOME'}."/.parallel";
2974 open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")
2975 || ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite");
2976 close $fh;
2977 print "\nThank you for your support. It is much appreciated. The citation\n",
2978 "notice is now silenced.\n";
2979 }
2980 }
2981 }
2982
2983 sub show_limits {
2984 # Returns: N/A
2985 print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
2986 "Maximal used size of command: ",Limits::Command::max_length(),"\n",
2987 "\n",
2988 "Execution of will continue now, and it will try to read its input\n",
2989 "and run commands; if this is not what you wanted to happen, please\n",
2990 "press CTRL-D or CTRL-C\n");
2991 }
2992
2993 sub __GENERIC_COMMON_FUNCTION__ {}
2994
2995 sub uniq {
2996 # Remove duplicates and return unique values
2997 return keys %{{ map { $_ => 1 } @_ }};
2998 }
2999
3000 sub min {
3001 # Returns:
3002 # Minimum value of array
3003 my $min;
3004 for (@_) {
3005 # Skip undefs
3006 defined $_ or next;
3007 defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
3008 $min = ($min < $_) ? $min : $_;
3009 }
3010 return $min;
3011 }
3012
3013 sub max {
3014 # Returns:
3015 # Maximum value of array
3016 my $max;
3017 for (@_) {
3018 # Skip undefs
3019 defined $_ or next;
3020 defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
3021 $max = ($max > $_) ? $max : $_;
3022 }
3023 return $max;
3024 }
3025
3026 sub sum {
3027 # Returns:
3028 # Sum of values of array
3029 my @args = @_;
3030 my $sum = 0;
3031 for (@args) {
3032 # Skip undefs
3033 $_ and do { $sum += $_; }
3034 }
3035 return $sum;
3036 }
3037
3038 sub undef_as_zero {
3039 my $a = shift;
3040 return $a ? $a : 0;
3041 }
3042
3043 sub undef_as_empty {
3044 my $a = shift;
3045 return $a ? $a : "";
3046 }
3047
3048 {
3049 my $hostname;
3050 sub hostname {
3051 if(not $hostname) {
3052 $hostname = `hostname`;
3053 chomp($hostname);
3054 $hostname ||= "nohostname";
3055 }
3056 return $hostname;
3057 }
3058 }
3059
3060 sub which {
3061 # Input:
3062 # @programs = programs to find the path to
3063 # Returns:
3064 # @full_path = full paths to @programs. Nothing if not found
3065 my @which;
3066 for my $prg (@_) {
3067 push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'});
3068 }
3069 return @which;
3070 }
3071
3072 {
3073 my ($regexp,%fakename);
3074
3075 sub parent_shell {
3076 # Input:
3077 # $pid = pid to see if (grand)*parent is a shell
3078 # Returns:
3079 # $shellpath = path to shell - undef if no shell found
3080 my $pid = shift;
3081 if(not $regexp) {
3082 # All shells known to mankind
3083 #
3084 # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
3085 # posh rbash rush rzsh sash sh static-sh tcsh yash zsh
3086 my @shells = qw(ash bash csh dash fdsh fish fizsh ksh
3087 ksh93 mksh pdksh posh rbash rush rzsh
3088 sash sh static-sh tcsh yash zsh -sh -csh);
3089 # Can be formatted as:
3090 # [sh] -sh sh busybox sh
3091 # /bin/sh /sbin/sh /opt/csw/sh
3092 # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh
3093 my $shell = "(?:".join("|",@shells).")";
3094 $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )';
3095 %fakename = (
3096 # csh and tcsh disguise themselves as -sh/-csh
3097 "-sh" => ["csh", "tcsh"],
3098 "-csh" => ["tcsh", "csh"],
3099 );
3100 }
3101 my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
3102 my $shellpath;
3103 my $testpid = $pid;
3104 while($testpid) {
3105 ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n");
3106 if($name_of_ref->{$testpid} =~ /$regexp/o) {
3107 ::debug("init", "which ".($3||$6)." => ");
3108 $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0];
3109 ::debug("init", "shell path $shellpath\n");
3110 $shellpath and last;
3111 }
3112 $testpid = $parent_of_ref->{$testpid};
3113 }
3114 return $shellpath;
3115 }
3116 }
3117
3118 {
3119 my %pid_parentpid_cmd;
3120
3121 sub pid_table {
3122 # Returns:
3123 # %children_of = { pid -> children of pid }
3124 # %parent_of = { pid -> pid of parent }
3125 # %name_of = { pid -> commandname }
3126
3127 if(not %pid_parentpid_cmd) {
3128 # Filter for SysV-style `ps`
3129 my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
3130 q(s/^.{$s}//; print "@F[1,2] $_"' );
3131 # BSD-style `ps`
3132 my $bsd = q(ps -o pid,ppid,command -ax);
3133 %pid_parentpid_cmd =
3134 (
3135 'aix' => $sysv,
3136 'cygwin' => $sysv,
3137 'msys' => $sysv,
3138 'dec_osf' => $sysv,
3139 'darwin' => $bsd,
3140 'dragonfly' => $bsd,
3141 'freebsd' => $bsd,
3142 'gnu' => $sysv,
3143 'hpux' => $sysv,
3144 'linux' => $sysv,
3145 'mirbsd' => $bsd,
3146 'netbsd' => $bsd,
3147 'nto' => $sysv,
3148 'openbsd' => $bsd,
3149 'solaris' => $sysv,
3150 'svr5' => $sysv,
3151 );
3152 }
3153 $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
3154
3155 my (@pidtable,%parent_of,%children_of,%name_of);
3156 # Table with pid -> children of pid
3157 @pidtable = `$pid_parentpid_cmd{$^O}`;
3158 my $p=$$;
3159 for (@pidtable) {
3160 # must match: 24436 21224 busybox ash
3161 /(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_");
3162 $parent_of{$1} = $2;
3163 push @{$children_of{$2}}, $1;
3164 $name_of{$1} = $3;
3165 }
3166 return(\%children_of, \%parent_of, \%name_of);
3167 }
3168 }
3169
3170 sub reap_usleep {
3171 # Reap dead children.
3172 # If no dead children: Sleep specified amount with exponential backoff
3173 # Input:
3174 # $ms = milliseconds to sleep
3175 # Returns:
3176 # $ms/2+0.001 if children reaped
3177 # $ms*1.1 if no children reaped
3178 my $ms = shift;
3179 if(reaper()) {
3180 # Sleep exponentially shorter (1/2^n) if a job finished
3181 return $ms/2+0.001;
3182 } else {
3183 if($opt::timeout) {
3184 $Global::timeoutq->process_timeouts();
3185 }
3186 usleep($ms);
3187 Job::exit_if_disk_full();
3188 if($opt::linebuffer) {
3189 for my $job (values %Global::running) {
3190 $job->print();
3191 }
3192 }
3193 # Sleep exponentially longer (1.1^n) if a job did not finish
3194 # though at most 1000 ms.
3195 return (($ms < 1000) ? ($ms * 1.1) : ($ms));
3196 }
3197 }
3198
3199 sub usleep {
3200 # Sleep this many milliseconds.
3201 # Input:
3202 # $ms = milliseconds to sleep
3203 my $ms = shift;
3204 ::debug(int($ms),"ms ");
3205 select(undef, undef, undef, $ms/1000);
3206 }
3207
3208 sub now {
3209 # Returns time since epoch as in seconds with 3 decimals
3210 # Uses:
3211 # @Global::use
3212 # Returns:
3213 # $time = time now with millisecond accuracy
3214 if(not $Global::use{"Time::HiRes"}) {
3215 if(eval "use Time::HiRes qw ( time );") {
3216 eval "sub TimeHiRestime { return Time::HiRes::time };";
3217 } else {
3218 eval "sub TimeHiRestime { return time() };";
3219 }
3220 $Global::use{"Time::HiRes"} = 1;
3221 }
3222
3223 return (int(TimeHiRestime()*1000))/1000;
3224 }
3225
3226 sub multiply_binary_prefix {
3227 # Evalualte numbers with binary prefix
3228 # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
3229 # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
3230 # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
3231 # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
3232 # 13G = 13*1024*1024*1024 = 13958643712
3233 # Input:
3234 # $s = string with prefixes
3235 # Returns:
3236 # $value = int with prefixes multiplied
3237 my $s = shift;
3238 $s =~ s/ki/*1024/gi;
3239 $s =~ s/mi/*1024*1024/gi;
3240 $s =~ s/gi/*1024*1024*1024/gi;
3241 $s =~ s/ti/*1024*1024*1024*1024/gi;
3242 $s =~ s/pi/*1024*1024*1024*1024*1024/gi;
3243 $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi;
3244 $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
3245 $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
3246 $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
3247
3248 $s =~ s/K/*1024/g;
3249 $s =~ s/M/*1024*1024/g;
3250 $s =~ s/G/*1024*1024*1024/g;
3251 $s =~ s/T/*1024*1024*1024*1024/g;
3252 $s =~ s/P/*1024*1024*1024*1024*1024/g;
3253 $s =~ s/E/*1024*1024*1024*1024*1024*1024/g;
3254 $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
3255 $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
3256 $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
3257
3258 $s =~ s/k/*1000/g;
3259 $s =~ s/m/*1000*1000/g;
3260 $s =~ s/g/*1000*1000*1000/g;
3261 $s =~ s/t/*1000*1000*1000*1000/g;
3262 $s =~ s/p/*1000*1000*1000*1000*1000/g;
3263 $s =~ s/e/*1000*1000*1000*1000*1000*1000/g;
3264 $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g;
3265 $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
3266 $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
3267
3268 $s = eval $s;
3269 ::debug($s);
3270 return $s;
3271 }
3272
3273 sub tmpfile {
3274 # Create tempfile as $TMPDIR/parXXXXX
3275 # Returns:
3276 # $filename = file name created
3277 return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
3278 }
3279
3280 sub __DEBUGGING__ {}
3281
3282 sub debug {
3283 # Uses:
3284 # $Global::debug
3285 # %Global::fd
3286 # Returns: N/A
3287 $Global::debug or return;
3288 @_ = grep { defined $_ ? $_ : "" } @_;
3289 if($Global::debug eq "all" or $Global::debug eq $_[0]) {
3290 if($Global::fd{1}) {
3291 # Original stdout was saved
3292 my $stdout = $Global::fd{1};
3293 print $stdout @_[1..$#_];
3294 } else {
3295 print @_[1..$#_];
3296 }
3297 }
3298 }
3299
3300 sub my_memory_usage {
3301 # Returns:
3302 # memory usage if found
3303 # 0 otherwise
3304 use strict;
3305 use FileHandle;
3306
3307 my $pid = $$;
3308 if(-e "/proc/$pid/stat") {
3309 my $fh = FileHandle->new("</proc/$pid/stat");
3310
3311 my $data = <$fh>;
3312 chomp $data;
3313 $fh->close;
3314
3315 my @procinfo = split(/\s+/,$data);
3316
3317 return undef_as_zero($procinfo[22]);
3318 } else {
3319 return 0;
3320 }
3321 }
3322
3323 sub my_size {
3324 # Returns:
3325 # $size = size of object if Devel::Size is installed
3326 # -1 otherwise
3327 my @size_this = (@_);
3328 eval "use Devel::Size qw(size total_size)";
3329 if ($@) {
3330 return -1;
3331 } else {
3332 return total_size(@_);
3333 }
3334 }
3335
3336 sub my_dump {
3337 # Returns:
3338 # ascii expression of object if Data::Dump(er) is installed
3339 # error code otherwise
3340 my @dump_this = (@_);
3341 eval "use Data::Dump qw(dump);";
3342 if ($@) {
3343 # Data::Dump not installed
3344 eval "use Data::Dumper;";
3345 if ($@) {
3346 my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
3347 "Not dumping output\n";
3348 print $Global::original_stderr $err;
3349 return $err;
3350 } else {
3351 return Dumper(@dump_this);
3352 }
3353 } else {
3354 # Create a dummy Data::Dump:dump as Hans Schou sometimes has
3355 # it undefined
3356 eval "sub Data::Dump:dump {}";
3357 eval "use Data::Dump qw(dump);";
3358 return (Data::Dump::dump(@dump_this));
3359 }
3360 }
3361
3362 sub my_croak {
3363 eval "use Carp; 1";
3364 $Carp::Verbose = 1;
3365 croak(@_);
3366 }
3367
3368 sub my_carp {
3369 eval "use Carp; 1";
3370 $Carp::Verbose = 1;
3371 carp(@_);
3372 }
3373
3374 sub __OBJECT_ORIENTED_PARTS__ {}
3375
3376 package SSHLogin;
3377
3378 sub new {
3379 my $class = shift;
3380 my $sshlogin_string = shift;
3381 my $ncpus;
3382 my %hostgroups;
3383 # SSHLogins can have these formats:
3384 # @grp+grp/ncpu//usr/bin/ssh user@server
3385 # ncpu//usr/bin/ssh user@server
3386 # /usr/bin/ssh user@server
3387 # user@server
3388 # ncpu/user@server
3389 # @grp+grp/user@server
3390 if($sshlogin_string =~ s:^\@([^/]+)/?::) {
3391 # Look for SSHLogin hostgroups
3392 %hostgroups = map { $_ => 1 } split(/\+/, $1);
3393 }
3394 if ($sshlogin_string =~ s:^(\d+)/::) {
3395 # Override default autodetected ncpus unless missing
3396 $ncpus = $1;
3397 }
3398 my $string = $sshlogin_string;
3399 # An SSHLogin is always in the hostgroup of its $string-name
3400 $hostgroups{$string} = 1;
3401 @Global::hostgroups{keys %hostgroups} = values %hostgroups;
3402 my @unget = ();
3403 my $no_slash_string = $string;
3404 $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
3405 return bless {
3406 'string' => $string,
3407 'jobs_running' => 0,
3408 'jobs_completed' => 0,
3409 'maxlength' => undef,
3410 'max_jobs_running' => undef,
3411 'orig_max_jobs_running' => undef,
3412 'ncpus' => $ncpus,
3413 'hostgroups' => \%hostgroups,
3414 'sshcommand' => undef,
3415 'serverlogin' => undef,
3416 'control_path_dir' => undef,
3417 'control_path' => undef,
3418 'time_to_login' => undef,
3419 'last_login_at' => undef,
3420 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" .
3421 $no_slash_string,
3422 'loadavg' => undef,
3423 'last_loadavg_update' => 0,
3424 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" .
3425 $no_slash_string,
3426 'swap_activity' => undef,
3427 }, ref($class) || $class;
3428 }
3429
3430 sub DESTROY {
3431 my $self = shift;
3432 # Remove temporary files if they are created.
3433 unlink $self->{'loadavg_file'};
3434 unlink $self->{'swap_activity_file'};
3435 }
3436
3437 sub string {
3438 my $self = shift;
3439 return $self->{'string'};
3440 }
3441
3442 sub jobs_running {
3443 my $self = shift;
3444
3445 return ($self->{'jobs_running'} || "0");
3446 }
3447
3448 sub inc_jobs_running {
3449 my $self = shift;
3450 $self->{'jobs_running'}++;
3451 }
3452
3453 sub dec_jobs_running {
3454 my $self = shift;
3455 $self->{'jobs_running'}--;
3456 }
3457
3458 sub set_maxlength {
3459 my $self = shift;
3460 $self->{'maxlength'} = shift;
3461 }
3462
3463 sub maxlength {
3464 my $self = shift;
3465 return $self->{'maxlength'};
3466 }
3467
3468 sub jobs_completed {
3469 my $self = shift;
3470 return $self->{'jobs_completed'};
3471 }
3472
3473 sub in_hostgroups {
3474 # Input:
3475 # @hostgroups = the hostgroups to look for
3476 # Returns:
3477 # true if intersection of @hostgroups and the hostgroups of this
3478 # SSHLogin is non-empty
3479 my $self = shift;
3480 return grep { defined $self->{'hostgroups'}{$_} } @_;
3481 }
3482
3483 sub hostgroups {
3484 my $self = shift;
3485 return keys %{$self->{'hostgroups'}};
3486 }
3487
3488 sub inc_jobs_completed {
3489 my $self = shift;
3490 $self->{'jobs_completed'}++;
3491 }
3492
3493 sub set_max_jobs_running {
3494 my $self = shift;
3495 if(defined $self->{'max_jobs_running'}) {
3496 $Global::max_jobs_running -= $self->{'max_jobs_running'};
3497 }
3498 $self->{'max_jobs_running'} = shift;
3499 if(defined $self->{'max_jobs_running'}) {
3500 # max_jobs_running could be resat if -j is a changed file
3501 $Global::max_jobs_running += $self->{'max_jobs_running'};
3502 }
3503 # Initialize orig to the first non-zero value that comes around
3504 $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
3505 }
3506
3507 sub swapping {
3508 my $self = shift;
3509 my $swapping = $self->swap_activity();
3510 return (not defined $swapping or $swapping)
3511 }
3512
3513 sub swap_activity {
3514 # If the currently known swap activity is too old:
3515 # Recompute a new one in the background
3516 # Returns:
3517 # last swap activity computed
3518 my $self = shift;
3519 # Should we update the swap_activity file?
3520 my $update_swap_activity_file = 0;
3521 if(-r $self->{'swap_activity_file'}) {
3522 open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r");
3523 my $swap_out = <$swap_fh>;
3524 close $swap_fh;
3525 if($swap_out =~ /^(\d+)$/) {
3526 $self->{'swap_activity'} = $1;
3527 ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
3528 }
3529 ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
3530 if(time - $self->{'last_swap_activity_update'} > 10) {
3531 # last swap activity update was started 10 seconds ago
3532 ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
3533 $update_swap_activity_file = 1;
3534 }
3535 } else {
3536 ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
3537 $self->{'swap_activity'} = undef;
3538 $update_swap_activity_file = 1;
3539 }
3540 if($update_swap_activity_file) {
3541 ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
3542 $self->{'last_swap_activity_update'} = time;
3543 -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
3544 -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
3545 my $swap_activity;
3546 $swap_activity = swapactivityscript();
3547 if($self->{'string'} ne ":") {
3548 $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
3549 ::shell_quote_scalar($swap_activity);
3550 }
3551 # Run swap_activity measuring.
3552 # As the command can take long to run if run remote
3553 # save it to a tmp file before moving it to the correct file
3554 my $file = $self->{'swap_activity_file'};
3555 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
3556 ::debug("swap", "\n", $swap_activity, "\n");
3557 qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
3558 }
3559 return $self->{'swap_activity'};
3560 }
3561
3562 {
3563 my $script;
3564
3565 sub swapactivityscript {
3566 # Returns:
3567 # shellscript for detecting swap activity
3568 #
3569 # arguments for vmstat are OS dependant
3570 # swap_in and swap_out are in different columns depending on OS
3571 #
3572 if(not $script) {
3573 my %vmstat = (
3574 # linux: $7*$8
3575 # $ vmstat 1 2
3576 # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
3577 # r b swpd free buff cache si so bi bo in cs us sy id wa
3578 # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
3579 # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
3580 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
3581
3582 # solaris: $6*$7
3583 # $ vmstat -S 1 2
3584 # kthr memory page disk faults cpu
3585 # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
3586 # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
3587 # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
3588 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
3589
3590 # darwin (macosx): $21*$22
3591 # $ vm_stat -c 2 1
3592 # Mach Virtual Memory Statistics: (page size of 4096 bytes)
3593 # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
3594 # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
3595 # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
3596 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
3597
3598 # ultrix: $12*$13
3599 # $ vmstat -S 1 2
3600 # procs faults cpu memory page disk
3601 # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
3602 # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
3603 # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
3604 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
3605
3606 # aix: $6*$7
3607 # $ vmstat 1 2
3608 # System configuration: lcpu=1 mem=2048MB
3609 #
3610 # kthr memory page faults cpu
3611 # ----- ----------- ------------------------ ------------ -----------
3612 # r b avm fre re pi po fr sr cy in sy cs us sy id wa
3613 # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
3614 # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
3615 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
3616
3617 # freebsd: $8*$9
3618 # $ vmstat -H 1 2
3619 # procs memory page disks faults cpu
3620 # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
3621 # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
3622 # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
3623 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
3624
3625 # mirbsd: $8*$9
3626 # $ vmstat 1 2
3627 # procs memory page disks traps cpu
3628 # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
3629 # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
3630 # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
3631 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
3632
3633 # netbsd: $7*$8
3634 # $ vmstat 1 2
3635 # procs memory page disks faults cpu
3636 # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
3637 # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
3638 # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
3639 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
3640
3641 # openbsd: $8*$9
3642 # $ vmstat 1 2
3643 # procs memory page disks traps cpu
3644 # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
3645 # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
3646 # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
3647 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
3648
3649 # hpux: $8*$9
3650 # $ vmstat 1 2
3651 # procs memory page faults cpu
3652 # r b w avm free re at pi po fr de sr in sy cs us sy id
3653 # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
3654 # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
3655 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
3656
3657 # dec_osf (tru64): $11*$12
3658 # $ vmstat 1 2
3659 # Virtual Memory Statistics: (pagesize = 8192)
3660 # procs memory pages intr cpu
3661 # r w u act free wire fault cow zero react pin pout in sy cs us sy id
3662 # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
3663 # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
3664 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
3665
3666 # gnu (hurd): $7*$8
3667 # $ vmstat -k 1 2
3668 # (pagesize: 4, size: 512288, swap size: 894972)
3669 # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
3670 # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
3671 # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
3672 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
3673
3674 # -nto (qnx has no swap)
3675 #-irix
3676 #-svr5 (scosysv)
3677 );
3678 my $perlscript = "";
3679 for my $os (keys %vmstat) {
3680 #q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ].
3681 # q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ];
3682 $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
3683 $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
3684 $vmstat{$os}[1] . '}"` }';
3685 }
3686 $perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
3687 $script = $Global::envvar. " " .$perlscript;
3688 }
3689 return $script;
3690 }
3691 }
3692
3693 sub too_fast_remote_login {
3694 my $self = shift;
3695 if($self->{'last_login_at'} and $self->{'time_to_login'}) {
3696 # sshd normally allows 10 simultaneous logins
3697 # A login takes time_to_login
3698 # So time_to_login/5 should be safe
3699 # If now <= last_login + time_to_login/5: Then it is too soon.
3700 my $too_fast = (::now() <= $self->{'last_login_at'}
3701 + $self->{'time_to_login'}/5);
3702 ::debug("run", "Too fast? $too_fast ");
3703 return $too_fast;
3704 } else {
3705 # No logins so far (or time_to_login not computed): it is not too fast
3706 return 0;
3707 }
3708 }
3709
3710 sub last_login_at {
3711 my $self = shift;
3712 return $self->{'last_login_at'};
3713 }
3714
3715 sub set_last_login_at {
3716 my $self = shift;
3717 $self->{'last_login_at'} = shift;
3718 }
3719
3720 sub loadavg_too_high {
3721 my $self = shift;
3722 my $loadavg = $self->loadavg();
3723 return (not defined $loadavg or
3724 $loadavg > $self->max_loadavg());
3725 }
3726
3727 sub loadavg {
3728 # If the currently know loadavg is too old:
3729 # Recompute a new one in the background
3730 # The load average is computed as the number of processes waiting for disk
3731 # or CPU right now. So it is the server load this instant and not averaged over
3732 # several minutes. This is needed so GNU Parallel will at most start one job
3733 # that will push the load over the limit.
3734 #
3735 # Returns:
3736 # $last_loadavg = last load average computed (undef if none)
3737 my $self = shift;
3738 # Should we update the loadavg file?
3739 my $update_loadavg_file = 0;
3740 if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
3741 local $/ = undef;
3742 my $load_out = <$load_fh>;
3743 close $load_fh;
3744 my $load =()= ($load_out=~/(^[DR]....[^\[])/gm);
3745 if($load > 0) {
3746 # load is overestimated by 1
3747 $self->{'loadavg'} = $load - 1;
3748 ::debug("load", "New loadavg: ", $self->{'loadavg'});
3749 } else {
3750 ::die_bug("loadavg_invalid_content: $load_out");
3751 }
3752 ::debug("load", "Last update: ", $self->{'last_loadavg_update'});
3753 if(time - $self->{'last_loadavg_update'} > 10) {
3754 # last loadavg was started 10 seconds ago
3755 ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ",
3756 $self->{'loadavg_file'});
3757 $update_loadavg_file = 1;
3758 }
3759 } else {
3760 ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
3761 $self->{'loadavg'} = undef;
3762 $update_loadavg_file = 1;
3763 }
3764 if($update_loadavg_file) {
3765 ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
3766 $self->{'last_loadavg_update'} = time;
3767 -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
3768 -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
3769 my $cmd = "";
3770 if($self->{'string'} ne ":") {
3771 $cmd = $self->sshcommand() . " " . $self->serverlogin() . " ";
3772 }
3773 # TODO Is is called 'ps ax -o state,command' on other platforms?
3774 $cmd .= "ps ax -o state,command";
3775 # As the command can take long to run if run remote
3776 # save it to a tmp file before moving it to the correct file
3777 my $file = $self->{'loadavg_file'};
3778 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa");
3779 qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
3780 }
3781 return $self->{'loadavg'};
3782 }
3783
3784 sub max_loadavg {
3785 my $self = shift;
3786 # If --load is a file it might be changed
3787 if($Global::max_load_file) {
3788 my $mtime = (stat($Global::max_load_file))[9];
3789 if($mtime > $Global::max_load_file_last_mod) {
3790 $Global::max_load_file_last_mod = $mtime;
3791 for my $sshlogin (values %Global::host) {
3792 $sshlogin->set_max_loadavg(undef);
3793 }
3794 }
3795 }
3796 if(not defined $self->{'max_loadavg'}) {
3797 $self->{'max_loadavg'} =
3798 $self->compute_max_loadavg($opt::load);
3799 }
3800 ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
3801 return $self->{'max_loadavg'};
3802 }
3803
3804 sub set_max_loadavg {
3805 my $self = shift;
3806 $self->{'max_loadavg'} = shift;
3807 }
3808
3809 sub compute_max_loadavg {
3810 # Parse the max loadaverage that the user asked for using --load
3811 # Returns:
3812 # max loadaverage
3813 my $self = shift;
3814 my $loadspec = shift;
3815 my $load;
3816 if(defined $loadspec) {
3817 if($loadspec =~ /^\+(\d+)$/) {
3818 # E.g. --load +2
3819 my $j = $1;
3820 $load =
3821 $self->ncpus() + $j;
3822 } elsif ($loadspec =~ /^-(\d+)$/) {
3823 # E.g. --load -2
3824 my $j = $1;
3825 $load =
3826 $self->ncpus() - $j;
3827 } elsif ($loadspec =~ /^(\d+)\%$/) {
3828 my $j = $1;
3829 $load =
3830 $self->ncpus() * $j / 100;
3831 } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
3832 $load = $1;
3833 } elsif (-f $loadspec) {
3834 $Global::max_load_file = $loadspec;
3835 $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
3836 if(open(my $in_fh, "<", $Global::max_load_file)) {
3837 my $opt_load_file = join("",<$in_fh>);
3838 close $in_fh;
3839 $load = $self->compute_max_loadavg($opt_load_file);
3840 } else {
3841 print $Global::original_stderr "Cannot open $loadspec\n";
3842 ::wait_and_exit(255);
3843 }
3844 } else {
3845 print $Global::original_stderr "Parsing of --load failed\n";
3846 ::die_usage();
3847 }
3848 if($load < 0.01) {
3849 $load = 0.01;
3850 }
3851 }
3852 return $load;
3853 }
3854
3855 sub time_to_login {
3856 my $self = shift;
3857 return $self->{'time_to_login'};
3858 }
3859
3860 sub set_time_to_login {
3861 my $self = shift;
3862 $self->{'time_to_login'} = shift;
3863 }
3864
3865 sub max_jobs_running {
3866 my $self = shift;
3867 if(not defined $self->{'max_jobs_running'}) {
3868 my $nproc = $self->compute_number_of_processes($opt::jobs);
3869 $self->set_max_jobs_running($nproc);
3870 }
3871 return $self->{'max_jobs_running'};
3872 }
3873
3874 sub orig_max_jobs_running {
3875 my $self = shift;
3876 return $self->{'orig_max_jobs_running'};
3877 }
3878
3879 sub compute_number_of_processes {
3880 # Number of processes wanted and limited by system resources
3881 # Returns:
3882 # Number of processes
3883 my $self = shift;
3884 my $opt_P = shift;
3885 my $wanted_processes = $self->user_requested_processes($opt_P);
3886 if(not defined $wanted_processes) {
3887 $wanted_processes = $Global::default_simultaneous_sshlogins;
3888 }
3889 ::debug("load", "Wanted procs: $wanted_processes\n");
3890 my $system_limit =
3891 $self->processes_available_by_system_limit($wanted_processes);
3892 ::debug("load", "Limited to procs: $system_limit\n");
3893 return $system_limit;
3894 }
3895
3896 sub processes_available_by_system_limit {
3897 # If the wanted number of processes is bigger than the system limits:
3898 # Limit them to the system limits
3899 # Limits are: File handles, number of input lines, processes,
3900 # and taking > 1 second to spawn 10 extra processes
3901 # Returns:
3902 # Number of processes
3903 my $self = shift;
3904 my $wanted_processes = shift;
3905
3906 my $system_limit = 0;
3907 my @jobs = ();
3908 my $job;
3909 my @args = ();
3910 my $arg;
3911 my $more_filehandles = 1;
3912 my $max_system_proc_reached = 0;
3913 my $slow_spawining_warning_printed = 0;
3914 my $time = time;
3915 my %fh;
3916 my @children;
3917
3918 # Reserve filehandles
3919 # perl uses 7 filehandles for something?
3920 # parallel uses 1 for memory_usage
3921 # parallel uses 4 for ?
3922 for my $i (1..12) {
3923 open($fh{"init-$i"}, "<", "/dev/null");
3924 }
3925
3926 for(1..2) {
3927 # System process limit
3928 my $child;
3929 if($child = fork()) {
3930 push (@children,$child);
3931 $Global::unkilled_children{$child} = 1;
3932 } elsif(defined $child) {
3933 # The child takes one process slot
3934 # It will be killed later
3935 $SIG{TERM} = $Global::original_sig{TERM};
3936 sleep 10000000;
3937 exit(0);
3938 } else {
3939 $max_system_proc_reached = 1;
3940 }
3941 }
3942 my $count_jobs_already_read = $Global::JobQueue->next_seq();
3943 my $wait_time_for_getting_args = 0;
3944 my $start_time = time;
3945 while(1) {
3946 $system_limit >= $wanted_processes and last;
3947 not $more_filehandles and last;
3948 $max_system_proc_reached and last;
3949 my $before_getting_arg = time;
3950 if($Global::semaphore or $opt::pipe) {
3951 # Skip: No need to get args
3952 } elsif(defined $opt::retries and $count_jobs_already_read) {
3953 # For retries we may need to run all jobs on this sshlogin
3954 # so include the already read jobs for this sshlogin
3955 $count_jobs_already_read--;
3956 } else {
3957 if($opt::X or $opt::m) {
3958 # The arguments may have to be re-spread over several jobslots
3959 # So pessimistically only read one arg per jobslot
3960 # instead of a full commandline
3961 if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
3962 if($Global::JobQueue->empty()) {
3963 last;
3964 } else {
3965 ($job) = $Global::JobQueue->get();
3966 push(@jobs, $job);
3967 }
3968 } else {
3969 ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
3970 push(@args, $arg);
3971 }
3972 } else {
3973 # If there are no more command lines, then we have a process
3974 # per command line, so no need to go further
3975 $Global::JobQueue->empty() and last;
3976 ($job) = $Global::JobQueue->get();
3977 push(@jobs, $job);
3978 }
3979 }
3980 $wait_time_for_getting_args += time - $before_getting_arg;
3981 $system_limit++;
3982
3983 # Every simultaneous process uses 2 filehandles when grouping
3984 # Every simultaneous process uses 2 filehandles when compressing
3985 $more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null")
3986 && open($fh{$system_limit*10+2}, "<", "/dev/null")
3987 && open($fh{$system_limit*10+3}, "<", "/dev/null")
3988 && open($fh{$system_limit*10+4}, "<", "/dev/null");
3989
3990 # System process limit
3991 my $child;
3992 if($child = fork()) {
3993 push (@children,$child);
3994 $Global::unkilled_children{$child} = 1;
3995 } elsif(defined $child) {
3996 # The child takes one process slot
3997 # It will be killed later
3998 $SIG{TERM} = $Global::original_sig{TERM};
3999 sleep 10000000;
4000 exit(0);
4001 } else {
4002 $max_system_proc_reached = 1;
4003 }
4004 my $forktime = time - $time - $wait_time_for_getting_args;
4005 ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
4006 $forktime,
4007 " (processes so far: ", $system_limit,")\n");
4008 if($system_limit > 10 and
4009 $forktime > 1 and
4010 $forktime > $system_limit * 0.01
4011 and not $slow_spawining_warning_printed) {
4012 # It took more than 0.01 second to fork a processes on avg.
4013 # Give the user a warning. He can press Ctrl-C if this
4014 # sucks.
4015 print $Global::original_stderr
4016 ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n",
4017 "Consider adjusting -j. Press CTRL-C to stop.\n");
4018 $slow_spawining_warning_printed = 1;
4019 }
4020 }
4021 # Cleanup: Close the files
4022 for (values %fh) { close $_ }
4023 # Cleanup: Kill the children
4024 for my $pid (@children) {
4025 kill 9, $pid;
4026 waitpid($pid,0);
4027 delete $Global::unkilled_children{$pid};
4028 }
4029 # Cleanup: Unget the command_lines or the @args
4030 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
4031 $Global::JobQueue->unget(@jobs);
4032 if($system_limit < $wanted_processes) {
4033 # The system_limit is less than the wanted_processes
4034 if($system_limit < 1 and not $Global::JobQueue->empty()) {
4035 ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n",
4036 "or /proc/sys/kernel/pid_max may help.\n");
4037 ::wait_and_exit(255);
4038 }
4039 if(not $more_filehandles) {
4040 ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n",
4041 "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ",
4042 "raising ulimit -n or /etc/security/limits.conf may help.\n");
4043 }
4044 if($max_system_proc_reached) {
4045 ::warning("Only enough available processes to run ", $system_limit,
4046 " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n",
4047 "or /proc/sys/kernel/pid_max may help.\n");
4048 }
4049 }
4050 if($] == 5.008008 and $system_limit > 1000) {
4051 # https://savannah.gnu.org/bugs/?36942
4052 $system_limit = 1000;
4053 }
4054 if($Global::JobQueue->empty()) {
4055 $system_limit ||= 1;
4056 }
4057 if($self->string() ne ":" and
4058 $system_limit > $Global::default_simultaneous_sshlogins) {
4059 $system_limit =
4060 $self->simultaneous_sshlogin_limit($system_limit);
4061 }
4062 return $system_limit;
4063 }
4064
4065 sub simultaneous_sshlogin_limit {
4066 # Test by logging in wanted number of times simultaneously
4067 # Returns:
4068 # min($wanted_processes,$working_simultaneous_ssh_logins-1)
4069 my $self = shift;
4070 my $wanted_processes = shift;
4071 if($self->{'time_to_login'}) {
4072 return $wanted_processes;
4073 }
4074
4075 # Try twice because it guesses wrong sometimes
4076 # Choose the minimal
4077 my $ssh_limit =
4078 ::min($self->simultaneous_sshlogin($wanted_processes),
4079 $self->simultaneous_sshlogin($wanted_processes));
4080 if($ssh_limit < $wanted_processes) {
4081 my $serverlogin = $self->serverlogin();
4082 ::warning("ssh to $serverlogin only allows ",
4083 "for $ssh_limit simultaneous logins.\n",
4084 "You may raise this by changing ",
4085 "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n",
4086 "Using only ",$ssh_limit-1," connections ",
4087 "to avoid race conditions.\n");
4088 }
4089 # Race condition can cause problem if using all sshs.
4090 if($ssh_limit > 1) { $ssh_limit -= 1; }
4091 return $ssh_limit;
4092 }
4093
4094 sub simultaneous_sshlogin {
4095 # Using $sshlogin try to see if we can do $wanted_processes
4096 # simultaneous logins
4097 # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l
4098 # Returns:
4099 # Number of succesful logins
4100 my $self = shift;
4101 my $wanted_processes = shift;
4102 my $sshcmd = $self->sshcommand();
4103 my $serverlogin = $self->serverlogin();
4104 my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
4105 my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin </dev/null 2>&1 &"x$wanted_processes;
4106 ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
4107 open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
4108 ::die_bug("simultaneouslogin");
4109 my $ssh_limit = <$simul_fh>;
4110 close $simul_fh;
4111 chomp $ssh_limit;
4112 return $ssh_limit;
4113 }
4114
4115 sub set_ncpus {
4116 my $self = shift;
4117 $self->{'ncpus'} = shift;
4118 }
4119
4120 sub user_requested_processes {
4121 # Parse the number of processes that the user asked for using -j
4122 # Returns:
4123 # the number of processes to run on this sshlogin
4124 my $self = shift;
4125 my $opt_P = shift;
4126 my $processes;
4127 if(defined $opt_P) {
4128 if($opt_P =~ /^\+(\d+)$/) {
4129 # E.g. -P +2
4130 my $j = $1;
4131 $processes =
4132 $self->ncpus() + $j;
4133 } elsif ($opt_P =~ /^-(\d+)$/) {
4134 # E.g. -P -2
4135 my $j = $1;
4136 $processes =
4137 $self->ncpus() - $j;
4138 } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
4139 # E.g. -P 10.5%
4140 my $j = $1;
4141 $processes =
4142 $self->ncpus() * $j / 100;
4143 } elsif ($opt_P =~ /^(\d+)$/) {
4144 $processes = $1;
4145 if($processes == 0) {
4146 # -P 0 = infinity (or at least close)
4147 $processes = $Global::infinity;
4148 }
4149 } elsif (-f $opt_P) {
4150 $Global::max_procs_file = $opt_P;
4151 $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9];
4152 if(open(my $in_fh, "<", $Global::max_procs_file)) {
4153 my $opt_P_file = join("",<$in_fh>);
4154 close $in_fh;
4155 $processes = $self->user_requested_processes($opt_P_file);
4156 } else {
4157 ::error("Cannot open $opt_P.\n");
4158 ::wait_and_exit(255);
4159 }
4160 } else {
4161 ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n");
4162 ::die_usage();
4163 }
4164 $processes = ::ceil($processes);
4165 }
4166 return $processes;
4167 }
4168
4169 sub ncpus {
4170 my $self = shift;
4171 if(not defined $self->{'ncpus'}) {
4172 my $sshcmd = $self->sshcommand();
4173 my $serverlogin = $self->serverlogin();
4174 if($serverlogin eq ":") {
4175 if($opt::use_cpus_instead_of_cores) {
4176 $self->{'ncpus'} = no_of_cpus();
4177 } else {
4178 $self->{'ncpus'} = no_of_cores();
4179 }
4180 } else {
4181 my $ncpu;
4182 my $sqe = ::shell_quote_scalar($Global::envvar);
4183 if($opt::use_cpus_instead_of_cores) {
4184 $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus);
4185 } else {
4186 ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n));
4187 $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores);
4188 }
4189 chomp $ncpu;
4190 if($ncpu =~ /^\s*[0-9]+\s*$/s) {
4191 $self->{'ncpus'} = $ncpu;
4192 } else {
4193 ::warning("Could not figure out ",
4194 "number of cpus on $serverlogin ($ncpu). Using 1.\n");
4195 $self->{'ncpus'} = 1;
4196 }
4197 }
4198 }
4199 return $self->{'ncpus'};
4200 }
4201
4202 sub no_of_cpus {
4203 # Returns:
4204 # Number of physical CPUs
4205 local $/="\n"; # If delimiter is set, then $/ will be wrong
4206 my $no_of_cpus;
4207 if ($^O eq 'linux') {
4208 $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux();
4209 } elsif ($^O eq 'freebsd') {
4210 $no_of_cpus = no_of_cpus_freebsd();
4211 } elsif ($^O eq 'netbsd') {
4212 $no_of_cpus = no_of_cpus_netbsd();
4213 } elsif ($^O eq 'openbsd') {
4214 $no_of_cpus = no_of_cpus_openbsd();
4215 } elsif ($^O eq 'gnu') {
4216 $no_of_cpus = no_of_cpus_hurd();
4217 } elsif ($^O eq 'darwin') {
4218 $no_of_cpus = no_of_cpus_darwin();
4219 } elsif ($^O eq 'solaris') {
4220 $no_of_cpus = no_of_cpus_solaris();
4221 } elsif ($^O eq 'aix') {
4222 $no_of_cpus = no_of_cpus_aix();
4223 } elsif ($^O eq 'hpux') {
4224 $no_of_cpus = no_of_cpus_hpux();
4225 } elsif ($^O eq 'nto') {
4226 $no_of_cpus = no_of_cpus_qnx();
4227 } elsif ($^O eq 'svr5') {
4228 $no_of_cpus = no_of_cpus_openserver();
4229 } elsif ($^O eq 'irix') {
4230 $no_of_cpus = no_of_cpus_irix();
4231 } elsif ($^O eq 'dec_osf') {
4232 $no_of_cpus = no_of_cpus_tru64();
4233 } else {
4234 $no_of_cpus = (no_of_cpus_gnu_linux()
4235 || no_of_cpus_freebsd()
4236 || no_of_cpus_netbsd()
4237 || no_of_cpus_openbsd()
4238 || no_of_cpus_hurd()
4239 || no_of_cpus_darwin()
4240 || no_of_cpus_solaris()
4241 || no_of_cpus_aix()
4242 || no_of_cpus_hpux()
4243 || no_of_cpus_qnx()
4244 || no_of_cpus_openserver()
4245 || no_of_cpus_irix()
4246 || no_of_cpus_tru64()
4247 # Number of cores is better than no guess for #CPUs
4248 || nproc()
4249 );
4250 }
4251 if($no_of_cpus) {
4252 chomp $no_of_cpus;
4253 return $no_of_cpus;
4254 } else {
4255 ::warning("Cannot figure out number of cpus. Using 1.\n");
4256 return 1;
4257 }
4258 }
4259
4260 sub no_of_cores {
4261 # Returns:
4262 # Number of CPU cores
4263 local $/="\n"; # If delimiter is set, then $/ will be wrong
4264 my $no_of_cores;
4265 if ($^O eq 'linux') {
4266 $no_of_cores = no_of_cores_gnu_linux();
4267 } elsif ($^O eq 'freebsd') {
4268 $no_of_cores = no_of_cores_freebsd();
4269 } elsif ($^O eq 'netbsd') {
4270 $no_of_cores = no_of_cores_netbsd();
4271 } elsif ($^O eq 'openbsd') {
4272 $no_of_cores = no_of_cores_openbsd();
4273 } elsif ($^O eq 'gnu') {
4274 $no_of_cores = no_of_cores_hurd();
4275 } elsif ($^O eq 'darwin') {
4276 $no_of_cores = no_of_cores_darwin();
4277 } elsif ($^O eq 'solaris') {
4278 $no_of_cores = no_of_cores_solaris();
4279 } elsif ($^O eq 'aix') {
4280 $no_of_cores = no_of_cores_aix();
4281 } elsif ($^O eq 'hpux') {
4282 $no_of_cores = no_of_cores_hpux();
4283 } elsif ($^O eq 'nto') {
4284 $no_of_cores = no_of_cores_qnx();
4285 } elsif ($^O eq 'svr5') {
4286 $no_of_cores = no_of_cores_openserver();
4287 } elsif ($^O eq 'irix') {
4288 $no_of_cores = no_of_cores_irix();
4289 } elsif ($^O eq 'dec_osf') {
4290 $no_of_cores = no_of_cores_tru64();
4291 } else {
4292 $no_of_cores = (no_of_cores_gnu_linux()
4293 || no_of_cores_freebsd()
4294 || no_of_cores_netbsd()
4295 || no_of_cores_openbsd()
4296 || no_of_cores_hurd()
4297 || no_of_cores_darwin()
4298 || no_of_cores_solaris()
4299 || no_of_cores_aix()
4300 || no_of_cores_hpux()
4301 || no_of_cores_qnx()
4302 || no_of_cores_openserver()
4303 || no_of_cores_irix()
4304 || no_of_cores_tru64()
4305 || nproc()
4306 );
4307 }
4308 if($no_of_cores) {
4309 chomp $no_of_cores;
4310 return $no_of_cores;
4311 } else {
4312 ::warning("Cannot figure out number of CPU cores. Using 1.\n");
4313 return 1;
4314 }
4315 }
4316
4317 sub nproc {
4318 # Returns:
4319 # Number of cores using `nproc`
4320 my $no_of_cores = `nproc 2>/dev/null`;
4321 return $no_of_cores;
4322 }
4323
4324 sub no_of_cpus_gnu_linux {
4325 # Returns:
4326 # Number of physical CPUs on GNU/Linux
4327 # undef if not GNU/Linux
4328 my $no_of_cpus;
4329 my $no_of_cores;
4330 if(-e "/proc/cpuinfo") {
4331 $no_of_cpus = 0;
4332 $no_of_cores = 0;
4333 my %seen;
4334 open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
4335 while(<$in_fh>) {
4336 if(/^physical id.*[:](.*)/ and not $seen{$1}++) {
4337 $no_of_cpus++;
4338 }
4339 /^processor.*[:]/i and $no_of_cores++;
4340 }
4341 close $in_fh;
4342 }
4343 return ($no_of_cpus||$no_of_cores);
4344 }
4345
4346 sub no_of_cores_gnu_linux {
4347 # Returns:
4348 # Number of CPU cores on GNU/Linux
4349 # undef if not GNU/Linux
4350 my $no_of_cores;
4351 if(-e "/proc/cpuinfo") {
4352 $no_of_cores = 0;
4353 open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
4354 while(<$in_fh>) {
4355 /^processor.*[:]/i and $no_of_cores++;
4356 }
4357 close $in_fh;
4358 }
4359 return $no_of_cores;
4360 }
4361
4362 sub no_of_cpus_freebsd {
4363 # Returns:
4364 # Number of physical CPUs on FreeBSD
4365 # undef if not FreeBSD
4366 my $no_of_cpus =
4367 (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'`
4368 or
4369 `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`);
4370 chomp $no_of_cpus;
4371 return $no_of_cpus;
4372 }
4373
4374 sub no_of_cores_freebsd {
4375 # Returns:
4376 # Number of CPU cores on FreeBSD
4377 # undef if not FreeBSD
4378 my $no_of_cores =
4379 (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`
4380 or
4381 `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`);
4382 chomp $no_of_cores;
4383 return $no_of_cores;
4384 }
4385
4386 sub no_of_cpus_netbsd {
4387 # Returns:
4388 # Number of physical CPUs on NetBSD
4389 # undef if not NetBSD
4390 my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`;
4391 chomp $no_of_cpus;
4392 return $no_of_cpus;
4393 }
4394
4395 sub no_of_cores_netbsd {
4396 # Returns:
4397 # Number of CPU cores on NetBSD
4398 # undef if not NetBSD
4399 my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`;
4400 chomp $no_of_cores;
4401 return $no_of_cores;
4402 }
4403
4404 sub no_of_cpus_openbsd {
4405 # Returns:
4406 # Number of physical CPUs on OpenBSD
4407 # undef if not OpenBSD
4408 my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`;
4409 chomp $no_of_cpus;
4410 return $no_of_cpus;
4411 }
4412
4413 sub no_of_cores_openbsd {
4414 # Returns:
4415 # Number of CPU cores on OpenBSD
4416 # undef if not OpenBSD
4417 my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`;
4418 chomp $no_of_cores;
4419 return $no_of_cores;
4420 }
4421
4422 sub no_of_cpus_hurd {
4423 # Returns:
4424 # Number of physical CPUs on HURD
4425 # undef if not HURD
4426 my $no_of_cpus = `nproc`;
4427 chomp $no_of_cpus;
4428 return $no_of_cpus;
4429 }
4430
4431 sub no_of_cores_hurd {
4432 # Returns:
4433 # Number of physical CPUs on HURD
4434 # undef if not HURD
4435 my $no_of_cores = `nproc`;
4436 chomp $no_of_cores;
4437 return $no_of_cores;
4438 }
4439
4440 sub no_of_cpus_darwin {
4441 # Returns:
4442 # Number of physical CPUs on Mac Darwin
4443 # undef if not Mac Darwin
4444 my $no_of_cpus =
4445 (`sysctl -n hw.physicalcpu 2>/dev/null`
4446 or
4447 `sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`);
4448 return $no_of_cpus;
4449 }
4450
4451 sub no_of_cores_darwin {
4452 # Returns:
4453 # Number of CPU cores on Mac Darwin
4454 # undef if not Mac Darwin
4455 my $no_of_cores =
4456 (`sysctl -n hw.logicalcpu 2>/dev/null`
4457 or
4458 `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`);
4459 return $no_of_cores;
4460 }
4461
4462 sub no_of_cpus_solaris {
4463 # Returns:
4464 # Number of physical CPUs on Solaris
4465 # undef if not Solaris
4466 if(-x "/usr/sbin/psrinfo") {
4467 my @psrinfo = `/usr/sbin/psrinfo`;
4468 if($#psrinfo >= 0) {
4469 return $#psrinfo +1;
4470 }
4471 }
4472 if(-x "/usr/sbin/prtconf") {
4473 my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
4474 if($#prtconf >= 0) {
4475 return $#prtconf +1;
4476 }
4477 }
4478 return undef;
4479 }
4480
4481 sub no_of_cores_solaris {
4482 # Returns:
4483 # Number of CPU cores on Solaris
4484 # undef if not Solaris
4485 if(-x "/usr/sbin/psrinfo") {
4486 my @psrinfo = `/usr/sbin/psrinfo`;
4487 if($#psrinfo >= 0) {
4488 return $#psrinfo +1;
4489 }
4490 }
4491 if(-x "/usr/sbin/prtconf") {
4492 my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
4493 if($#prtconf >= 0) {
4494 return $#prtconf +1;
4495 }
4496 }
4497 return undef;
4498 }
4499
4500 sub no_of_cpus_aix {
4501 # Returns:
4502 # Number of physical CPUs on AIX
4503 # undef if not AIX
4504 my $no_of_cpus = 0;
4505 if(-x "/usr/sbin/lscfg") {
4506 open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")
4507 || return undef;
4508 $no_of_cpus = <$in_fh>;
4509 chomp ($no_of_cpus);
4510 close $in_fh;
4511 }
4512 return $no_of_cpus;
4513 }
4514
4515 sub no_of_cores_aix {
4516 # Returns:
4517 # Number of CPU cores on AIX
4518 # undef if not AIX
4519 my $no_of_cores;
4520 if(-x "/usr/bin/vmstat") {
4521 open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef;
4522 while(<$in_fh>) {
4523 /lcpu=([0-9]*) / and $no_of_cores = $1;
4524 }
4525 close $in_fh;
4526 }
4527 return $no_of_cores;
4528 }
4529
4530 sub no_of_cpus_hpux {
4531 # Returns:
4532 # Number of physical CPUs on HP-UX
4533 # undef if not HP-UX
4534 my $no_of_cpus =
4535 (`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`);
4536 return $no_of_cpus;
4537 }
4538
4539 sub no_of_cores_hpux {
4540 # Returns:
4541 # Number of CPU cores on HP-UX
4542 # undef if not HP-UX
4543 my $no_of_cores =
4544 (`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`);
4545 return $no_of_cores;
4546 }
4547
4548 sub no_of_cpus_qnx {
4549 # Returns:
4550 # Number of physical CPUs on QNX
4551 # undef if not QNX
4552 # BUG: It is now known how to calculate this.
4553 my $no_of_cpus = 0;
4554 return $no_of_cpus;
4555 }
4556
4557 sub no_of_cores_qnx {
4558 # Returns:
4559 # Number of CPU cores on QNX
4560 # undef if not QNX
4561 # BUG: It is now known how to calculate this.
4562 my $no_of_cores = 0;
4563 return $no_of_cores;
4564 }
4565
4566 sub no_of_cpus_openserver {
4567 # Returns:
4568 # Number of physical CPUs on SCO OpenServer
4569 # undef if not SCO OpenServer
4570 my $no_of_cpus = 0;
4571 if(-x "/usr/sbin/psrinfo") {
4572 my @psrinfo = `/usr/sbin/psrinfo`;
4573 if($#psrinfo >= 0) {
4574 return $#psrinfo +1;
4575 }
4576 }
4577 return $no_of_cpus;
4578 }
4579
4580 sub no_of_cores_openserver {
4581 # Returns:
4582 # Number of CPU cores on SCO OpenServer
4583 # undef if not SCO OpenServer
4584 my $no_of_cores = 0;
4585 if(-x "/usr/sbin/psrinfo") {
4586 my @psrinfo = `/usr/sbin/psrinfo`;
4587 if($#psrinfo >= 0) {
4588 return $#psrinfo +1;
4589 }
4590 }
4591 return $no_of_cores;
4592 }
4593
4594 sub no_of_cpus_irix {
4595 # Returns:
4596 # Number of physical CPUs on IRIX
4597 # undef if not IRIX
4598 my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
4599 return $no_of_cpus;
4600 }
4601
4602 sub no_of_cores_irix {
4603 # Returns:
4604 # Number of CPU cores on IRIX
4605 # undef if not IRIX
4606 my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
4607 return $no_of_cores;
4608 }
4609
4610 sub no_of_cpus_tru64 {
4611 # Returns:
4612 # Number of physical CPUs on Tru64
4613 # undef if not Tru64
4614 my $no_of_cpus = `sizer -pr`;
4615 return $no_of_cpus;
4616 }
4617
4618 sub no_of_cores_tru64 {
4619 # Returns:
4620 # Number of CPU cores on Tru64
4621 # undef if not Tru64
4622 my $no_of_cores = `sizer -pr`;
4623 return $no_of_cores;
4624 }
4625
4626 sub sshcommand {
4627 my $self = shift;
4628 if (not defined $self->{'sshcommand'}) {
4629 $self->sshcommand_of_sshlogin();
4630 }
4631 return $self->{'sshcommand'};
4632 }
4633
4634 sub serverlogin {
4635 my $self = shift;
4636 if (not defined $self->{'serverlogin'}) {
4637 $self->sshcommand_of_sshlogin();
4638 }
4639 return $self->{'serverlogin'};
4640 }
4641
4642 sub sshcommand_of_sshlogin {
4643 # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
4644 # 'user@server' -> ('ssh','user@server')
4645 # 'myssh user@server' -> ('myssh','user@server')
4646 # 'myssh -l user server' -> ('myssh -l user','server')
4647 # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
4648 # Returns:
4649 # sshcommand - defaults to 'ssh'
4650 # login@host
4651 my $self = shift;
4652 my ($sshcmd, $serverlogin);
4653 if($self->{'string'} =~ /(.+) (\S+)$/) {
4654 # Own ssh command
4655 $sshcmd = $1; $serverlogin = $2;
4656 } else {
4657 # Normal ssh
4658 if($opt::controlmaster) {
4659 # Use control_path to make ssh faster
4660 my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
4661 $sshcmd = "ssh -S ".$control_path;
4662 $serverlogin = $self->{'string'};
4663 if(not $self->{'control_path'}{$control_path}++) {
4664 # Master is not running for this control_path
4665 # Start it
4666 my $pid = fork();
4667 if($pid) {
4668 $Global::sshmaster{$pid} ||= 1;
4669 } else {
4670 $SIG{'TERM'} = undef;
4671 # Ignore the 'foo' being printed
4672 open(STDOUT,">","/dev/null");
4673 # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
4674 # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument"
4675 open(STDERR,">","/dev/null");
4676 open(STDIN,"<","/dev/null");
4677 # Run a sleep that outputs data, so it will discover if the ssh connection closes.
4678 my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
4679 my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
4680 exec(@master);
4681 }
4682 }
4683 } else {
4684 $sshcmd = "ssh"; $serverlogin = $self->{'string'};
4685 }
4686 }
4687 $self->{'sshcommand'} = $sshcmd;
4688 $self->{'serverlogin'} = $serverlogin;
4689 }
4690
4691 sub control_path_dir {
4692 # Returns:
4693 # path to directory
4694 my $self = shift;
4695 if(not defined $self->{'control_path_dir'}) {
4696 -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
4697 -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
4698 $self->{'control_path_dir'} =
4699 File::Temp::tempdir($ENV{'HOME'}
4700 . "/.parallel/tmp/control_path_dir-XXXX",
4701 CLEANUP => 1);
4702 }
4703 return $self->{'control_path_dir'};
4704 }
4705
4706 sub rsync_transfer_cmd {
4707 # Command to run to transfer a file
4708 # Input:
4709 # $file = filename of file to transfer
4710 # $workdir = destination dir
4711 # Returns:
4712 # $cmd = rsync command to run to transfer $file ("" if unreadable)
4713 my $self = shift;
4714 my $file = shift;
4715 my $workdir = shift;
4716 if(not -r $file) {
4717 ::warning($file, " is not readable and will not be transferred.\n");
4718 return "true";
4719 }
4720 my $rsync_destdir;
4721 if($file =~ m:^/:) {
4722 # rsync /foo/bar /
4723 $rsync_destdir = "/";
4724 } else {
4725 $rsync_destdir = ::shell_quote_file($workdir);
4726 }
4727 $file = ::shell_quote_file($file);
4728 my $sshcmd = $self->sshcommand();
4729 my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd);
4730 my $serverlogin = $self->serverlogin();
4731 # Make dir if it does not exist
4732 return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" .
4733 rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )";
4734 }
4735
4736 sub cleanup_cmd {
4737 # Command to run to remove the remote file
4738 # Input:
4739 # $file = filename to remove
4740 # $workdir = destination dir
4741 # Returns:
4742 # $cmd = ssh command to run to remove $file and empty parent dirs
4743 my $self = shift;
4744 my $file = shift;
4745 my $workdir = shift;
4746 my $f = $file;
4747 if($f =~ m:/\./:) {
4748 # foo/bar/./baz/quux => workdir/baz/quux
4749 # /foo/bar/./baz/quux => workdir/baz/quux
4750 $f =~ s:.*/\./:$workdir/:;
4751 } elsif($f =~ m:^[^/]:) {
4752 # foo/bar => workdir/foo/bar
4753 $f = $workdir."/".$f;
4754 }
4755 my @subdirs = split m:/:, ::dirname($f);
4756 my @rmdir;
4757 my $dir = "";
4758 for(@subdirs) {
4759 $dir .= $_."/";
4760 unshift @rmdir, ::shell_quote_file($dir);
4761 }
4762 my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : "";
4763 if(defined $opt::workdir and $opt::workdir eq "...") {
4764 $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
4765 }
4766
4767 $f = ::shell_quote_file($f);
4768 my $sshcmd = $self->sshcommand();
4769 my $serverlogin = $self->serverlogin();
4770 return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)");
4771 }
4772
4773 {
4774 my $rsync;
4775
4776 sub rsync {
4777 # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
4778 # If the version >= 3.1.0: downgrade to protocol 30
4779 if(not $rsync) {
4780 my @out = `rsync --version`;
4781 for (@out) {
4782 if(/version (\d+.\d+)(.\d+)?/) {
4783 if($1 >= 3.1) {
4784 # Version 3.1.0 or later: Downgrade to protocol 30
4785 $rsync = "rsync --protocol 30";
4786 } else {
4787 $rsync = "rsync";
4788 }
4789 }
4790 }
4791 $rsync or ::die_bug("Cannot figure out version of rsync: @out");
4792 }
4793 return $rsync;
4794 }
4795 }
4796
4797
4798 package JobQueue;
4799
4800 sub new {
4801 my $class = shift;
4802 my $commandref = shift;
4803 my $read_from = shift;
4804 my $context_replace = shift;
4805 my $max_number_of_args = shift;
4806 my $return_files = shift;
4807 my $commandlinequeue = CommandLineQueue->new
4808 ($commandref, $read_from, $context_replace, $max_number_of_args,
4809 $return_files);
4810 my @unget = ();
4811 return bless {
4812 'unget' => \@unget,
4813 'commandlinequeue' => $commandlinequeue,
4814 'total_jobs' => undef,
4815 }, ref($class) || $class;
4816 }
4817
4818 sub get {
4819 my $self = shift;
4820
4821 if(@{$self->{'unget'}}) {
4822 my $job = shift @{$self->{'unget'}};
4823 return ($job);
4824 } else {
4825 my $commandline = $self->{'commandlinequeue'}->get();
4826 if(defined $commandline) {
4827 my $job = Job->new($commandline);
4828 return $job;
4829 } else {
4830 return undef;
4831 }
4832 }
4833 }
4834
4835 sub unget {
4836 my $self = shift;
4837 unshift @{$self->{'unget'}}, @_;
4838 }
4839
4840 sub empty {
4841 my $self = shift;
4842 my $empty = (not @{$self->{'unget'}})
4843 && $self->{'commandlinequeue'}->empty();
4844 ::debug("run", "JobQueue->empty $empty ");
4845 return $empty;
4846 }
4847
4848 sub total_jobs {
4849 my $self = shift;
4850 if(not defined $self->{'total_jobs'}) {
4851 my $job;
4852 my @queue;
4853 my $start = time;
4854 while($job = $self->get()) {
4855 if(time - $start > 10) {
4856 ::warning("Reading all arguments takes longer than 10 seconds.\n");
4857 $opt::eta && ::warning("Consider removing --eta.\n");
4858 $opt::bar && ::warning("Consider removing --bar.\n");
4859 last;
4860 }
4861 push @queue, $job;
4862 }
4863 while($job = $self->get()) {
4864 push @queue, $job;
4865 }
4866
4867 $self->unget(@queue);
4868 $self->{'total_jobs'} = $#queue+1;
4869 }
4870 return $self->{'total_jobs'};
4871 }
4872
4873 sub next_seq {
4874 my $self = shift;
4875
4876 return $self->{'commandlinequeue'}->seq();
4877 }
4878
4879 sub quote_args {
4880 my $self = shift;
4881 return $self->{'commandlinequeue'}->quote_args();
4882 }
4883
4884
4885 package Job;
4886
4887 sub new {
4888 my $class = shift;
4889 my $commandlineref = shift;
4890 return bless {
4891 'commandline' => $commandlineref, # CommandLine object
4892 'workdir' => undef, # --workdir
4893 'stdin' => undef, # filehandle for stdin (used for --pipe)
4894 # filename for writing stdout to (used for --files)
4895 'remaining' => "", # remaining data not sent to stdin (used for --pipe)
4896 'datawritten' => 0, # amount of data sent via stdin (used for --pipe)
4897 'transfersize' => 0, # size of files using --transfer
4898 'returnsize' => 0, # size of files using --return
4899 'pid' => undef,
4900 # hash of { SSHLogins => number of times the command failed there }
4901 'failed' => undef,
4902 'sshlogin' => undef,
4903 # The commandline wrapped with rsync and ssh
4904 'sshlogin_wrap' => undef,
4905 'exitstatus' => undef,
4906 'exitsignal' => undef,
4907 # Timestamp for timeout if any
4908 'timeout' => undef,
4909 'virgin' => 1,
4910 }, ref($class) || $class;
4911 }
4912
4913 sub replaced {
4914 my $self = shift;
4915 $self->{'commandline'} or ::die_bug("commandline empty");
4916 return $self->{'commandline'}->replaced();
4917 }
4918
4919 sub seq {
4920 my $self = shift;
4921 return $self->{'commandline'}->seq();
4922 }
4923
4924 sub slot {
4925 my $self = shift;
4926 return $self->{'commandline'}->slot();
4927 }
4928
4929 {
4930 my($cattail);
4931
4932 sub cattail {
4933 # Returns:
4934 # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
4935 if(not $cattail) {
4936 $cattail = q{
4937 # cat followed by tail.
4938 # If $writerpid dead: finish after this round
4939 use Fcntl;
4940
4941 $|=1;
4942
4943 my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
4944 if($read_file) {
4945 open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
4946 } else {
4947 *IN = *STDIN;
4948 }
4949
4950 my $flags;
4951 fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
4952 $flags |= O_NONBLOCK; # Add non-blocking to the flags
4953 fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
4954 open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
4955
4956 while(1) {
4957 # clear EOF
4958 seek(IN,0,1);
4959 my $writer_running = kill 0, $writerpid;
4960 $read = sysread(IN,$buf,32768);
4961 if($read) {
4962 # We can unlink the file now: The writer has written something
4963 -e $unlink_file and unlink $unlink_file;
4964 # Blocking print
4965 while($buf) {
4966 my $bytes_written = syswrite(OUT,$buf);
4967 # syswrite may be interrupted by SIGHUP
4968 substr($buf,0,$bytes_written) = "";
4969 }
4970 # Something printed: Wait less next time
4971 $sleep /= 2;
4972 } else {
4973 if(eof(IN) and not $writer_running) {
4974 # Writer dead: There will never be more to read => exit
4975 exit;
4976 }
4977 # TODO This could probably be done more efficiently using select(2)
4978 # Nothing read: Wait longer before next read
4979 # Up to 30 milliseconds
4980 $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
4981 usleep($sleep);
4982 }
4983 }
4984
4985 sub usleep {
4986 # Sleep this many milliseconds.
4987 my $secs = shift;
4988 select(undef, undef, undef, $secs/1000);
4989 }
4990 };
4991 $cattail =~ s/#.*//mg;
4992 $cattail =~ s/\s+/ /g;
4993 }
4994 return $cattail;
4995 }
4996 }
4997
4998 sub openoutputfiles {
4999 # Open files for STDOUT and STDERR
5000 # Set file handles in $self->fh
5001 my $self = shift;
5002 my ($outfhw, $errfhw, $outname, $errname);
5003 if($opt::results) {
5004 my $args_as_dirname = $self->{'commandline'}->args_as_dirname();
5005 # Output in: prefix/name1/val1/name2/val2/stdout
5006 my $dir = $opt::results."/".$args_as_dirname;
5007 if(eval{ File::Path::mkpath($dir); }) {
5008 # OK
5009 } else {
5010 # mkpath failed: Argument probably too long.
5011 # Set $Global::max_file_length, which will keep the individual
5012 # dir names shorter than the max length
5013 max_file_name_length($opt::results);
5014 $args_as_dirname = $self->{'commandline'}->args_as_dirname();
5015 # prefix/name1/val1/name2/val2/
5016 $dir = $opt::results."/".$args_as_dirname;
5017 File::Path::mkpath($dir);
5018 }
5019 # prefix/name1/val1/name2/val2/stdout
5020 $outname = "$dir/stdout";
5021 if(not open($outfhw, "+>", $outname)) {
5022 ::error("Cannot write to `$outname'.\n");
5023 ::wait_and_exit(255);
5024 }
5025 # prefix/name1/val1/name2/val2/stderr
5026 $errname = "$dir/stderr";
5027 if(not open($errfhw, "+>", $errname)) {
5028 ::error("Cannot write to `$errname'.\n");
5029 ::wait_and_exit(255);
5030 }
5031 $self->set_fh(1,"unlink","");
5032 $self->set_fh(2,"unlink","");
5033 } elsif(not $opt::ungroup) {
5034 # To group we create temporary files for STDOUT and STDERR
5035 # To avoid the cleanup unlink the files immediately (but keep them open)
5036 if(@Global::tee_jobs) {
5037 # files must be removed when the tee is done
5038 } elsif($opt::files) {
5039 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
5040 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
5041 # --files => only remove stderr
5042 $self->set_fh(1,"unlink","");
5043 $self->set_fh(2,"unlink",$errname);
5044 } else {
5045 ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
5046 ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
5047 $self->set_fh(1,"unlink",$outname);
5048 $self->set_fh(2,"unlink",$errname);
5049 }
5050 } else {
5051 # --ungroup
5052 open($outfhw,">&",$Global::fd{1}) || die;
5053 open($errfhw,">&",$Global::fd{2}) || die;
5054 # File name must be empty as it will otherwise be printed
5055 $outname = "";
5056 $errname = "";
5057 $self->set_fh(1,"unlink",$outname);
5058 $self->set_fh(2,"unlink",$errname);
5059 }
5060 # Set writing FD
5061 $self->set_fh(1,'w',$outfhw);
5062 $self->set_fh(2,'w',$errfhw);
5063 $self->set_fh(1,'name',$outname);
5064 $self->set_fh(2,'name',$errname);
5065 if($opt::compress) {
5066 # Send stdout to stdin for $opt::compress_program(1)
5067 # Send stderr to stdin for $opt::compress_program(2)
5068 # cattail get pid: $pid = $self->fh($fdno,'rpid');
5069 my $cattail = cattail();
5070 for my $fdno (1,2) {
5071 my $wpid = open(my $fdw,"|-","$opt::compress_program >>".
5072 $self->fh($fdno,'name')) || die $?;
5073 $self->set_fh($fdno,'w',$fdw);
5074 $self->set_fh($fdno,'wpid',$wpid);
5075 my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail,
5076 $opt::decompress_program, $wpid,
5077 $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?;
5078 $self->set_fh($fdno,'r',$fdr);
5079 $self->set_fh($fdno,'rpid',$rpid);
5080 }
5081 } elsif(not $opt::ungroup) {
5082 # Set reading FD if using --group (--ungroup does not need)
5083 for my $fdno (1,2) {
5084 # Re-open the file for reading
5085 # so fdw can be closed seperately
5086 # and fdr can be seeked seperately (for --line-buffer)
5087 open(my $fdr,"<", $self->fh($fdno,'name')) ||
5088 ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
5089 $self->set_fh($fdno,'r',$fdr);
5090 # Unlink if required
5091 $Global::debug or unlink $self->fh($fdno,"unlink");
5092 }
5093 }
5094 if($opt::linebuffer) {
5095 # Set non-blocking when using --linebuffer
5096 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
5097 for my $fdno (1,2) {
5098 my $fdr = $self->fh($fdno,'r');
5099 my $flags;
5100 fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
5101 $flags |= &O_NONBLOCK; # Add non-blocking to the flags
5102 fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
5103 }
5104 }
5105 }
5106
5107 sub max_file_name_length {
5108 # Figure out the max length of a subdir
5109 # TODO and the max total length
5110 # Ext4 = 255,130816
5111 my $testdir = shift;
5112
5113 my $upper = 8_000_000;
5114 my $len = 8;
5115 my $dir="x"x$len;
5116 do {
5117 rmdir($testdir."/".$dir);
5118 $len *= 16;
5119 $dir="x"x$len;
5120 } while (mkdir $testdir."/".$dir);
5121 # Then search for the actual max length between $len/16 and $len
5122 my $min = $len/16;
5123 my $max = $len;
5124 while($max-$min > 5) {
5125 # If we are within 5 chars of the exact value:
5126 # it is not worth the extra time to find the exact value
5127 my $test = int(($min+$max)/2);
5128 $dir="x"x$test;
5129 if(mkdir $testdir."/".$dir) {
5130 rmdir($testdir."/".$dir);
5131 $min = $test;
5132 } else {
5133 $max = $test;
5134 }
5135 }
5136 $Global::max_file_length = $min;
5137 return $min;
5138 }
5139
5140 sub set_fh {
5141 # Set file handle
5142 my ($self, $fd_no, $key, $fh) = @_;
5143 $self->{'fd'}{$fd_no,$key} = $fh;
5144 }
5145
5146 sub fh {
5147 # Get file handle
5148 my ($self, $fd_no, $key) = @_;
5149 return $self->{'fd'}{$fd_no,$key};
5150 }
5151
5152 sub write {
5153 my $self = shift;
5154 my $remaining_ref = shift;
5155 my $stdin_fh = $self->fh(0,"w");
5156 syswrite($stdin_fh,$$remaining_ref);
5157 }
5158
5159 sub set_stdin_buffer {
5160 # Copy stdin buffer from $block_ref up to $endpos
5161 # Prepend with $header_ref
5162 # Remove $recstart and $recend if needed
5163 # Input:
5164 # $header_ref = ref to $header to prepend
5165 # $block_ref = ref to $block to pass on
5166 # $endpos = length of $block to pass on
5167 # $recstart = --recstart regexp
5168 # $recend = --recend regexp
5169 # Returns:
5170 # N/A
5171 my $self = shift;
5172 my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_;
5173 $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos);
5174 if($opt::remove_rec_sep) {
5175 remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend);
5176 }
5177 $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'};
5178 $self->{'stdin_buffer_pos'} = 0;
5179 }
5180
5181 sub stdin_buffer_length {
5182 my $self = shift;
5183 return $self->{'stdin_buffer_length'};
5184 }
5185
5186 sub remove_rec_sep {
5187 my ($block_ref,$recstart,$recend) = @_;
5188 # Remove record separator
5189 $$block_ref =~ s/$recend$recstart//gos;
5190 $$block_ref =~ s/^$recstart//os;
5191 $$block_ref =~ s/$recend$//os;
5192 }
5193
5194 sub non_block_write {
5195 my $self = shift;
5196 my $something_written = 0;
5197 use POSIX qw(:errno_h);
5198 # use Fcntl;
5199 # my $flags = '';
5200 for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) {
5201 my $in = $self->fh(0,"w");
5202 # fcntl($in, F_GETFL, $flags)
5203 # or die "Couldn't get flags for HANDLE : $!\n";
5204 # $flags |= O_NONBLOCK;
5205 # fcntl($in, F_SETFL, $flags)
5206 # or die "Couldn't set flags for HANDLE: $!\n";
5207 my $rv = syswrite($in, $buf);
5208 if (!defined($rv) && $! == EAGAIN) {
5209 # would block
5210 $something_written = 0;
5211 } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) {
5212 # incomplete write
5213 # Remove the written part
5214 $self->{'stdin_buffer_pos'} += $rv;
5215 $something_written = $rv;
5216 } else {
5217 # successfully wrote everything
5218 my $a="";
5219 $self->set_stdin_buffer(\$a,\$a,"","");
5220 $something_written = $rv;
5221 }
5222 }
5223
5224 ::debug("pipe", "Non-block: ", $something_written);
5225 return $something_written;
5226 }
5227
5228
5229 sub virgin {
5230 my $self = shift;
5231 return $self->{'virgin'};
5232 }
5233
5234 sub set_virgin {
5235 my $self = shift;
5236 $self->{'virgin'} = shift;
5237 }
5238
5239 sub pid {
5240 my $self = shift;
5241 return $self->{'pid'};
5242 }
5243
5244 sub set_pid {
5245 my $self = shift;
5246 $self->{'pid'} = shift;
5247 }
5248
5249 sub starttime {
5250 # Returns:
5251 # UNIX-timestamp this job started
5252 my $self = shift;
5253 return sprintf("%.3f",$self->{'starttime'});
5254 }
5255
5256 sub set_starttime {
5257 my $self = shift;
5258 my $starttime = shift || ::now();
5259 $self->{'starttime'} = $starttime;
5260 }
5261
5262 sub runtime {
5263 # Returns:
5264 # Run time in seconds
5265 my $self = shift;
5266 return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000);
5267 }
5268
5269 sub endtime {
5270 # Returns:
5271 # UNIX-timestamp this job ended
5272 # 0 if not ended yet
5273 my $self = shift;
5274 return ($self->{'endtime'} || 0);
5275 }
5276
5277 sub set_endtime {
5278 my $self = shift;
5279 my $endtime = shift;
5280 $self->{'endtime'} = $endtime;
5281 }
5282
5283 sub timedout {
5284 # Is the job timedout?
5285 # Input:
5286 # $delta_time = time that the job may run
5287 # Returns:
5288 # True or false
5289 my $self = shift;
5290 my $delta_time = shift;
5291 return time > $self->{'starttime'} + $delta_time;
5292 }
5293
5294 sub kill {
5295 # Kill the job.
5296 # Send the signals to (grand)*children and pid.
5297 # If no signals: TERM TERM KILL
5298 # Wait 200 ms after each TERM.
5299 # Input:
5300 # @signals = signals to send
5301 my $self = shift;
5302 my @signals = @_;
5303 my @family_pids = $self->family_pids();
5304 # Record this jobs as failed
5305 $self->set_exitstatus(-1);
5306 # Send two TERMs to give time to clean up
5307 ::debug("run", "Kill seq ", $self->seq(), "\n");
5308 my @send_signals = @signals || ("TERM", "TERM", "KILL");
5309 for my $signal (@send_signals) {
5310 my $alive = 0;
5311 for my $pid (@family_pids) {
5312 if(kill 0, $pid) {
5313 # The job still running
5314 kill $signal, $pid;
5315 $alive = 1;
5316 }
5317 }
5318 # If a signal was given as input, do not do the sleep below
5319 @signals and next;
5320
5321 if($signal eq "TERM" and $alive) {
5322 # Wait up to 200 ms between TERMs - but only if any pids are alive
5323 my $sleep = 1;
5324 for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200;
5325 $sleepsum += $sleep) {
5326 $sleep = ::reap_usleep($sleep);
5327 }
5328 }
5329 }
5330 }
5331
5332 sub family_pids {
5333 # Find the pids with this->pid as (grand)*parent
5334 # Returns:
5335 # @pids = pids of (grand)*children
5336 my $self = shift;
5337 my $pid = $self->pid();
5338 my @pids;
5339
5340 my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
5341
5342 my @more = ($pid);
5343 # While more (grand)*children
5344 while(@more) {
5345 my @m;
5346 push @pids, @more;
5347 for my $parent (@more) {
5348 if($children_of_ref->{$parent}) {
5349 # add the children of this parent
5350 push @m, @{$children_of_ref->{$parent}};
5351 }
5352 }
5353 @more = @m;
5354 }
5355 return (@pids);
5356 }
5357
5358 sub failed {
5359 # return number of times failed for this $sshlogin
5360 # Input:
5361 # $sshlogin
5362 # Returns:
5363 # Number of times failed for $sshlogin
5364 my $self = shift;
5365 my $sshlogin = shift;
5366 return $self->{'failed'}{$sshlogin};
5367 }
5368
5369 sub failed_here {
5370 # return number of times failed for the current $sshlogin
5371 # Returns:
5372 # Number of times failed for this sshlogin
5373 my $self = shift;
5374 return $self->{'failed'}{$self->sshlogin()};
5375 }
5376
5377 sub add_failed {
5378 # increase the number of times failed for this $sshlogin
5379 my $self = shift;
5380 my $sshlogin = shift;
5381 $self->{'failed'}{$sshlogin}++;
5382 }
5383
5384 sub add_failed_here {
5385 # increase the number of times failed for the current $sshlogin
5386 my $self = shift;
5387 $self->{'failed'}{$self->sshlogin()}++;
5388 }
5389
5390 sub reset_failed {
5391 # increase the number of times failed for this $sshlogin
5392 my $self = shift;
5393 my $sshlogin = shift;
5394 delete $self->{'failed'}{$sshlogin};
5395 }
5396
5397 sub reset_failed_here {
5398 # increase the number of times failed for this $sshlogin
5399 my $self = shift;
5400 delete $self->{'failed'}{$self->sshlogin()};
5401 }
5402
5403 sub min_failed {
5404 # Returns:
5405 # the number of sshlogins this command has failed on
5406 # the minimal number of times this command has failed
5407 my $self = shift;
5408 my $min_failures =
5409 ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
5410 my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
5411 return ($number_of_sshlogins_failed_on,$min_failures);
5412 }
5413
5414 sub total_failed {
5415 # Returns:
5416 # $total_failures = the number of times this command has failed
5417 my $self = shift;
5418 my $total_failures = 0;
5419 for (values %{$self->{'failed'}}) {
5420 $total_failures += $_;
5421 }
5422 return $total_failures;
5423 }
5424
5425 sub wrapped {
5426 # Wrap command with:
5427 # * --shellquote
5428 # * --nice
5429 # * --cat
5430 # * --fifo
5431 # * --sshlogin
5432 # * --pipepart (@Global::cat_partials)
5433 # * --pipe
5434 # * --tmux
5435 # The ordering of the wrapping is important:
5436 # * --nice/--cat/--fifo should be done on the remote machine
5437 # * --pipepart/--pipe should be done on the local machine inside --tmux
5438 # Uses:
5439 # $Global::envvar
5440 # $opt::shellquote
5441 # $opt::nice
5442 # $Global::shell
5443 # $opt::cat
5444 # $opt::fifo
5445 # @Global::cat_partials
5446 # $opt::pipe
5447 # $opt::tmux
5448 # Returns:
5449 # $self->{'wrapped'} = the command wrapped with the above
5450 my $self = shift;
5451 if(not defined $self->{'wrapped'}) {
5452 my $command = $Global::envvar.$self->replaced();
5453 if($opt::shellquote) {
5454 # Prepend echo
5455 # and quote twice
5456 $command = "echo " .
5457 ::shell_quote_scalar(::shell_quote_scalar($command));
5458 }
5459 if($opt::nice) {
5460 # Prepend \nice -n19 $SHELL -c
5461 # and quote.
5462 # The '\' before nice is needed to avoid tcsh's built-in
5463 $command = '\nice'. " -n". $opt::nice. " ".
5464 $Global::shell. " -c ".
5465 ::shell_quote_scalar($command);
5466 }
5467 if($opt::cat) {
5468 # Prepend 'cat > {};'
5469 # Append '_EXIT=$?;(rm {};exit $_EXIT)'
5470 $command =
5471 $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0).
5472 $command.
5473 $self->{'commandline'}->replace_placeholders(
5474 ["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0);
5475 } elsif($opt::fifo) {
5476 # Prepend 'mkfifo {}; ('
5477 # Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)'
5478 $command =
5479 $self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0).
5480 $command.
5481 $self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ",
5482 "wait \$_PID; _EXIT=\$?; ",
5483 "rm \257<\257>; exit \$_EXIT"],
5484 0,0);
5485 }
5486 # Wrap with ssh + tranferring of files
5487 $command = $self->sshlogin_wrap($command);
5488 if(@Global::cat_partials) {
5489 # Prepend:
5490 # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 |
5491 $command = (shift @Global::cat_partials). "|". "(". $command. ")";
5492 } elsif($opt::pipe) {
5493 # Prepend EOF-detector to avoid starting $command if EOF.
5494 # The $tmpfile might exist if run on a remote system - we accept that risk
5495 my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr");
5496 # Unlink to avoid leaving files if --dry-run or --sshlogin
5497 unlink $tmpfile;
5498 $command =
5499 # Exit value:
5500 # empty input = true
5501 # some input = exit val from command
5502 qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }.
5503 qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }.
5504 qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }.
5505 "($command);";
5506 }
5507 if($opt::tmux) {
5508 # Wrap command with 'tmux'
5509 $command = $self->tmux_wrap($command);
5510 }
5511 $self->{'wrapped'} = $command;
5512 }
5513 return $self->{'wrapped'};
5514 }
5515
5516 sub set_sshlogin {
5517 my $self = shift;
5518 my $sshlogin = shift;
5519 $self->{'sshlogin'} = $sshlogin;
5520 delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
5521 delete $self->{'wrapped'};
5522 }
5523
5524 sub sshlogin {
5525 my $self = shift;
5526 return $self->{'sshlogin'};
5527 }
5528
5529 sub sshlogin_wrap {
5530 # Wrap the command with the commands needed to run remotely
5531 # Returns:
5532 # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
5533 my $self = shift;
5534 my $command = shift;
5535 if(not defined $self->{'sshlogin_wrap'}) {
5536 my $sshlogin = $self->sshlogin();
5537 my $sshcmd = $sshlogin->sshcommand();
5538 my $serverlogin = $sshlogin->serverlogin();
5539 my ($pre,$post,$cleanup)=("","","");
5540
5541 if($serverlogin eq ":") {
5542 # No transfer neeeded
5543 $self->{'sshlogin_wrap'} = $command;
5544 } else {
5545 # --transfer
5546 $pre .= $self->sshtransfer();
5547 # --return
5548 $post .= $self->sshreturn();
5549 # --cleanup
5550 $post .= $self->sshcleanup();
5551 if($post) {
5552 # We need to save the exit status of the job
5553 $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;';
5554 }
5555 # If the remote login shell is (t)csh then use 'setenv'
5556 # otherwise use 'export'
5557 # We cannot use parse_env_var(), as PARALLEL_SEQ changes
5558 # for each command
5559 my $parallel_env =
5560 ($Global::envwarn
5561 . q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null }
5562 . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; }
5563 . q{ setenv PARALLEL_PID '$PARALLEL_PID' }
5564 . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; }
5565 . q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' });
5566 my $remote_pre = "";
5567 my $ssh_options = "";
5568 if(($opt::pipe or $opt::pipepart) and $opt::ctrlc
5569 or
5570 not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) {
5571 # TODO Determine if this is needed
5572 # Propagating CTRL-C to kill remote jobs requires
5573 # remote jobs to be run with a terminal.
5574 $ssh_options = "-tt -oLogLevel=quiet";
5575 # $ssh_options = "";
5576 # tty - check if we have a tty.
5577 # stty:
5578 # -onlcr - make output 8-bit clean
5579 # isig - pass CTRL-C as signal
5580 # -echo - do not echo input
5581 $remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;');
5582 }
5583 if($opt::workdir) {
5584 my $wd = ::shell_quote_file($self->workdir());
5585 $remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd .
5586 ::shell_quote_scalar("; cd ") . $wd .
5587 # exit 255 (instead of exec false) would be the correct thing,
5588 # but that fails on tcsh
5589 ::shell_quote_scalar(qq{ || exec false;});
5590 }
5591 # This script is to solve the problem of
5592 # * not mixing STDERR and STDOUT
5593 # * terminating with ctrl-c
5594 # It works on Linux but not Solaris
5595 # Finishes on Solaris, but wrong exit code:
5596 # $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)};
5597 # Hangs on Solaris, but correct exit code on Linux:
5598 # $SIG{CHLD} = sub { $done = 1 };
5599 # $p->poll;
5600 my $signal_script = "perl -e '".
5601 q{
5602 use IO::Poll;
5603 $SIG{CHLD} = sub { $done = 1 };
5604 $p = IO::Poll->new;
5605 $p->mask(STDOUT, POLLHUP);
5606 $pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"}
5607 $p->poll;
5608 kill SIGHUP, -${pid} unless $done;
5609 wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8)
5610 } . "' ";
5611 $signal_script =~ s/\s+/ /g;
5612
5613 $self->{'sshlogin_wrap'} =
5614 ($pre
5615 . "$sshcmd $ssh_options $serverlogin $parallel_env "
5616 . $remote_pre
5617 # . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command))
5618 . ::shell_quote_scalar($command)
5619 . ";"
5620 . $post);
5621 }
5622 }
5623 return $self->{'sshlogin_wrap'};
5624 }
5625
5626 sub transfer {
5627 # Files to transfer
5628 # Returns:
5629 # @transfer - File names of files to transfer
5630 my $self = shift;
5631 my @transfer = ();
5632 $self->{'transfersize'} = 0;
5633 if($opt::transfer) {
5634 for my $record (@{$self->{'commandline'}{'arg_list'}}) {
5635 # Merge arguments from records into args
5636 for my $arg (@$record) {
5637 CORE::push @transfer, $arg->orig();
5638 # filesize
5639 if(-e $arg->orig()) {
5640 $self->{'transfersize'} += (stat($arg->orig()))[7];
5641 }
5642 }
5643 }
5644 }
5645 return @transfer;
5646 }
5647
5648 sub transfersize {
5649 my $self = shift;
5650 return $self->{'transfersize'};
5651 }
5652
5653 sub sshtransfer {
5654 # Returns for each transfer file:
5655 # rsync $file remote:$workdir
5656 my $self = shift;
5657 my @pre;
5658 my $sshlogin = $self->sshlogin();
5659 my $workdir = $self->workdir();
5660 for my $file ($self->transfer()) {
5661 push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
5662 }
5663 return join("",@pre);
5664 }
5665
5666 sub return {
5667 # Files to return
5668 # Non-quoted and with {...} substituted
5669 # Returns:
5670 # @non_quoted_filenames
5671 my $self = shift;
5672 return $self->{'commandline'}->
5673 replace_placeholders($self->{'commandline'}{'return_files'},0,0);
5674 }
5675
5676 sub returnsize {
5677 # This is called after the job has finished
5678 # Returns:
5679 # $number_of_bytes transferred in return
5680 my $self = shift;
5681 for my $file ($self->return()) {
5682 if(-e $file) {
5683 $self->{'returnsize'} += (stat($file))[7];
5684 }
5685 }
5686 return $self->{'returnsize'};
5687 }
5688
5689 sub sshreturn {
5690 # Returns for each return-file:
5691 # rsync remote:$workdir/$file .
5692 my $self = shift;
5693 my $sshlogin = $self->sshlogin();
5694 my $sshcmd = $sshlogin->sshcommand();
5695 my $serverlogin = $sshlogin->serverlogin();
5696 my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd);
5697 my $pre = "";
5698 for my $file ($self->return()) {
5699 $file =~ s:^\./::g; # Remove ./ if any
5700 my $relpath = ($file !~ m:^/:); # Is the path relative?
5701 my $cd = "";
5702 my $wd = "";
5703 if($relpath) {
5704 # rsync -avR /foo/./bar/baz.c remote:/tmp/
5705 # == (on old systems)
5706 # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
5707 $wd = ::shell_quote_file($self->workdir()."/");
5708 }
5709 # Only load File::Basename if actually needed
5710 $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
5711 # dir/./file means relative to dir, so remove dir on remote
5712 $file =~ m:(.*)/\./:;
5713 my $basedir = $1 ? ::shell_quote_file($1."/") : "";
5714 my $nobasedir = $file;
5715 $nobasedir =~ s:.*/\./::;
5716 $cd = ::shell_quote_file(::dirname($nobasedir));
5717 my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync");
5718 my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file)));
5719 # --return
5720 # mkdir -p /home/tange/dir/subdir/;
5721 # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync"
5722 # server:file.gz /home/tange/dir/subdir/
5723 $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:".
5724 $basename . " ".$basedir.$cd.";";
5725 }
5726 return $pre;
5727 }
5728
5729 sub sshcleanup {
5730 # Return the sshcommand needed to remove the file
5731 # Returns:
5732 # ssh command needed to remove files from sshlogin
5733 my $self = shift;
5734 my $sshlogin = $self->sshlogin();
5735 my $sshcmd = $sshlogin->sshcommand();
5736 my $serverlogin = $sshlogin->serverlogin();
5737 my $workdir = $self->workdir();
5738 my $cleancmd = "";
5739
5740 for my $file ($self->cleanup()) {
5741 my @subworkdirs = parentdirs_of($file);
5742 $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
5743 }
5744 if(defined $opt::workdir and $opt::workdir eq "...") {
5745 $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';';
5746 }
5747 return $cleancmd;
5748 }
5749
5750 sub cleanup {
5751 # Returns:
5752 # Files to remove at cleanup
5753 my $self = shift;
5754 if($opt::cleanup) {
5755 my @transfer = $self->transfer();
5756 my @return = $self->return();
5757 return (@transfer,@return);
5758 } else {
5759 return ();
5760 }
5761 }
5762
5763 sub workdir {
5764 # Returns:
5765 # the workdir on a remote machine
5766 my $self = shift;
5767 if(not defined $self->{'workdir'}) {
5768 my $workdir;
5769 if(defined $opt::workdir) {
5770 if($opt::workdir eq ".") {
5771 # . means current dir
5772 my $home = $ENV{'HOME'};
5773 eval 'use Cwd';
5774 my $cwd = cwd();
5775 $workdir = $cwd;
5776 if($home) {
5777 # If homedir exists: remove the homedir from
5778 # workdir if cwd starts with homedir
5779 # E.g. /home/foo/my/dir => my/dir
5780 # E.g. /tmp/my/dir => /tmp/my/dir
5781 my ($home_dev, $home_ino) = (stat($home))[0,1];
5782 my $parent = "";
5783 my @dir_parts = split(m:/:,$cwd);
5784 my $part;
5785 while(defined ($part = shift @dir_parts)) {
5786 $part eq "" and next;
5787 $parent .= "/".$part;
5788 my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
5789 if($parent_dev == $home_dev and $parent_ino == $home_ino) {
5790 # dev and ino is the same: We found the homedir.
5791 $workdir = join("/",@dir_parts);
5792 last;
5793 }
5794 }
5795 }
5796 if($workdir eq "") {
5797 $workdir = ".";
5798 }
5799 } elsif($opt::workdir eq "...") {
5800 $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
5801 . "-" . $self->seq();
5802 } else {
5803 $workdir = $opt::workdir;
5804 # Rsync treats /./ special. We dont want that
5805 $workdir =~ s:/\./:/:g; # Remove /./
5806 $workdir =~ s:/+$::; # Remove ending / if any
5807 $workdir =~ s:^\./::g; # Remove starting ./ if any
5808 }
5809 } else {
5810 $workdir = ".";
5811 }
5812 $self->{'workdir'} = ::shell_quote_scalar($workdir);
5813 }
5814 return $self->{'workdir'};
5815 }
5816
5817 sub parentdirs_of {
5818 # Return:
5819 # all parentdirs except . of this dir or file - sorted desc by length
5820 my $d = shift;
5821 my @parents = ();
5822 while($d =~ s:/[^/]+$::) {
5823 if($d ne ".") {
5824 push @parents, $d;
5825 }
5826 }
5827 return @parents;
5828 }
5829
5830 sub start {
5831 # Setup STDOUT and STDERR for a job and start it.
5832 # Returns:
5833 # job-object or undef if job not to run
5834 my $job = shift;
5835 # Get the shell command to be executed (possibly with ssh infront).
5836 my $command = $job->wrapped();
5837
5838 if($Global::interactive or $Global::stderr_verbose) {
5839 if($Global::interactive) {
5840 print $Global::original_stderr "$command ?...";
5841 open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
5842 my $answer = <$tty_fh>;
5843 close $tty_fh;
5844 my $run_yes = ($answer =~ /^\s*y/i);
5845 if (not $run_yes) {
5846 $command = "true"; # Run the command 'true'
5847 }
5848 } else {
5849 print $Global::original_stderr "$command\n";
5850 }
5851 }
5852
5853 my $pid;
5854 $job->openoutputfiles();
5855 my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
5856 local (*IN,*OUT,*ERR);
5857 open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!");
5858 open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!");
5859
5860 if(($opt::dryrun or $Global::verbose) and $opt::ungroup) {
5861 if($Global::verbose <= 1) {
5862 print $stdout_fh $job->replaced(),"\n";
5863 } else {
5864 # Verbose level > 1: Print the rsync and stuff
5865 print $stdout_fh $command,"\n";
5866 }
5867 }
5868 if($opt::dryrun) {
5869 $command = "true";
5870 }
5871 $ENV{'PARALLEL_SEQ'} = $job->seq();
5872 $ENV{'PARALLEL_PID'} = $$;
5873 ::debug("run", $Global::total_running, " processes . Starting (",
5874 $job->seq(), "): $command\n");
5875 if($opt::pipe) {
5876 my ($stdin_fh);
5877 # The eval is needed to catch exception from open3
5878 eval {
5879 $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
5880 ::die_bug("open3-pipe");
5881 1;
5882 };
5883 $job->set_fh(0,"w",$stdin_fh);
5884 } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1
5885 and $job->sshlogin()->string() eq ":") {
5886 # Give STDIN to the first job if using -a (but only if running
5887 # locally - otherwise CTRL-C does not work for other jobs Bug#36585)
5888 *IN = *STDIN;
5889 # The eval is needed to catch exception from open3
5890 eval {
5891 $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
5892 ::die_bug("open3-a");
5893 1;
5894 };
5895 # Re-open to avoid complaining
5896 open(STDIN, "<&", $Global::original_stdin)
5897 or ::die_bug("dup-\$Global::original_stdin: $!");
5898 } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
5899 open(my $devtty_fh, "<", "/dev/tty")) {
5900 # Give /dev/tty to the command if no one else is using it
5901 *IN = $devtty_fh;
5902 # The eval is needed to catch exception from open3
5903 eval {
5904 $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
5905 ::die_bug("open3-/dev/tty");
5906 $Global::tty_taken = $pid;
5907 close $devtty_fh;
5908 1;
5909 };
5910 } else {
5911 # The eval is needed to catch exception from open3
5912 eval {
5913 $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
5914 ::die_bug("open3-gensym");
5915 1;
5916 };
5917 }
5918 if($pid) {
5919 # A job was started
5920 $Global::total_running++;
5921 $Global::total_started++;
5922 $job->set_pid($pid);
5923 $job->set_starttime();
5924 $Global::running{$job->pid()} = $job;
5925 if($opt::timeout) {
5926 $Global::timeoutq->insert($job);
5927 }
5928 $Global::newest_job = $job;
5929 $Global::newest_starttime = ::now();
5930 return $job;
5931 } else {
5932 # No more processes
5933 ::debug("run", "Cannot spawn more jobs.\n");
5934 return undef;
5935 }
5936 }
5937
5938 sub tmux_wrap {
5939 # Wrap command with tmux for session pPID
5940 # Input:
5941 # $actual_command = the actual command being run (incl ssh wrap)
5942 my $self = shift;
5943 my $actual_command = shift;
5944 # Temporary file name. Used for fifo to communicate exit val
5945 my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx");
5946 $Global::unlink{$tmpfile}=1;
5947 close $fh;
5948 unlink $tmpfile;
5949 my $visual_command = $self->replaced();
5950 my $title = $visual_command;
5951 # ; causes problems
5952 # ascii 194-245 annoys tmux
5953 $title =~ tr/[\011-\016;\302-\365]//d;
5954
5955 my $tmux;
5956 if($Global::total_running == 0) {
5957 $tmux = "tmux new-session -s p$$ -d -n ".
5958 ::shell_quote_scalar($title);
5959 print $Global::original_stderr "See output with: tmux attach -t p$$\n";
5960 } else {
5961 $tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title);
5962 }
5963 return "mkfifo $tmpfile; $tmux ".
5964 # Run in tmux
5965 ::shell_quote_scalar(
5966 "(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&".
5967 "echo ".::shell_quote_scalar($visual_command).";".
5968 "echo \007Job finished at: `date`;sleep 10").
5969 # Run outside tmux
5970 # Read the first line from the fifo and use that as status code
5971 "; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` ";
5972 }
5973
5974 sub is_already_in_results {
5975 # Do we already have results for this job?
5976 # Returns:
5977 # $job_already_run = bool whether there is output for this or not
5978 my $job = $_[0];
5979 my $args_as_dirname = $job->{'commandline'}->args_as_dirname();
5980 # prefix/name1/val1/name2/val2/
5981 my $dir = $opt::results."/".$args_as_dirname;
5982 ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n");
5983 return -e "$dir/stdout";
5984 }
5985
5986 sub is_already_in_joblog {
5987 my $job = shift;
5988 return vec($Global::job_already_run,$job->seq(),1);
5989 }
5990
5991 sub set_job_in_joblog {
5992 my $job = shift;
5993 vec($Global::job_already_run,$job->seq(),1) = 1;
5994 }
5995
5996 sub should_be_retried {
5997 # Should this job be retried?
5998 # Returns
5999 # 0 - do not retry
6000 # 1 - job queued for retry
6001 my $self = shift;
6002 if (not $opt::retries) {
6003 return 0;
6004 }
6005 if(not $self->exitstatus()) {
6006 # Completed with success. If there is a recorded failure: forget it
6007 $self->reset_failed_here();
6008 return 0
6009 } else {
6010 # The job failed. Should it be retried?
6011 $self->add_failed_here();
6012 if($self->total_failed() == $opt::retries) {
6013 # This has been retried enough
6014 return 0;
6015 } else {
6016 # This command should be retried
6017 $self->set_endtime(undef);
6018 $Global::JobQueue->unget($self);
6019 ::debug("run", "Retry ", $self->seq(), "\n");
6020 return 1;
6021 }
6022 }
6023 }
6024
6025 sub print {
6026 # Print the output of the jobs
6027 # Returns: N/A
6028
6029 my $self = shift;
6030 ::debug("print", ">>joboutput ", $self->replaced(), "\n");
6031 if($opt::dryrun) {
6032 # Nothing was printed to this job:
6033 # cleanup tmp files if --files was set
6034 unlink $self->fh(1,"name");
6035 }
6036 if($opt::pipe and $self->virgin()) {
6037 # Skip --joblog, --dryrun, --verbose
6038 } else {
6039 if($Global::joblog and defined $self->{'exitstatus'}) {
6040 # Add to joblog when finished
6041 $self->print_joblog();
6042 }
6043
6044 # Printing is only relevant for grouped/--line-buffer output.
6045 $opt::ungroup and return;
6046 # Check for disk full
6047 exit_if_disk_full();
6048
6049 if(($opt::dryrun or $Global::verbose)
6050 and
6051 not $self->{'verbose_printed'}) {
6052 $self->{'verbose_printed'}++;
6053 if($Global::verbose <= 1) {
6054 print STDOUT $self->replaced(),"\n";
6055 } else {
6056 # Verbose level > 1: Print the rsync and stuff
6057 print STDOUT $self->wrapped(),"\n";
6058 }
6059 # If STDOUT and STDERR are merged,
6060 # we want the command to be printed first
6061 # so flush to avoid STDOUT being buffered
6062 flush STDOUT;
6063 }
6064 }
6065 for my $fdno (sort { $a <=> $b } keys %Global::fd) {
6066 # Sort by file descriptor numerically: 1,2,3,..,9,10,11
6067 $fdno == 0 and next;
6068 my $out_fd = $Global::fd{$fdno};
6069 my $in_fh = $self->fh($fdno,"r");
6070 if(not $in_fh) {
6071 if(not $Job::file_descriptor_warning_printed{$fdno}++) {
6072 # ::warning("File descriptor $fdno not defined\n");
6073 }
6074 next;
6075 }
6076 ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):");
6077 if($opt::files) {
6078 # If --compress: $in_fh must be closed first.
6079 close $self->fh($fdno,"w");
6080 close $in_fh;
6081 if($opt::pipe and $self->virgin()) {
6082 # Nothing was printed to this job:
6083 # cleanup unused tmp files if --files was set
6084 for my $fdno (1,2) {
6085 unlink $self->fh($fdno,"name");
6086 unlink $self->fh($fdno,"unlink");
6087 }
6088 } elsif($fdno == 1 and $self->fh($fdno,"name")) {
6089 print $out_fd $self->fh($fdno,"name"),"\n";
6090 }
6091 } elsif($opt::linebuffer) {
6092 # Line buffered print out
6093 $self->linebuffer_print($fdno,$in_fh,$out_fd);
6094 } else {
6095 my $buf;
6096 close $self->fh($fdno,"w");
6097 seek $in_fh, 0, 0;
6098 # $in_fh is now ready for reading at position 0
6099 if($opt::tag or defined $opt::tagstring) {
6100 my $tag = $self->tag();
6101 if($fdno == 2) {
6102 # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
6103 # This is a crappy way of ignoring it.
6104 while(<$in_fh>) {
6105 if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) {
6106 # Skip
6107 } else {
6108 print $out_fd $tag,$_;
6109 }
6110 # At most run the loop once
6111 last;
6112 }
6113 }
6114 while(<$in_fh>) {
6115 print $out_fd $tag,$_;
6116 }
6117 } else {
6118 my $buf;
6119 if($fdno == 2) {
6120 # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
6121 # This is a crappy way of ignoring it.
6122 sysread($in_fh,$buf,1_000);
6123 $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
6124 print $out_fd $buf;
6125 }
6126 while(sysread($in_fh,$buf,32768)) {
6127 print $out_fd $buf;
6128 }
6129 }
6130 close $in_fh;
6131 }
6132 flush $out_fd;
6133 }
6134 ::debug("print", "<<joboutput @command\n");
6135 }
6136
6137 sub linebuffer_print {
6138 my $self = shift;
6139 my ($fdno,$in_fh,$out_fd) = @_;
6140 my $partial = \$self->{'partial_line',$fdno};
6141
6142 if(defined $self->{'exitstatus'}) {
6143 # If the job is dead: close printing fh. Needed for --compress
6144 close $self->fh($fdno,"w");
6145 if($opt::compress) {
6146 # Blocked reading in final round
6147 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
6148 for my $fdno (1,2) {
6149 my $fdr = $self->fh($fdno,'r');
6150 my $flags;
6151 fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
6152 $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags
6153 fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
6154 }
6155 }
6156 }
6157 # This seek will clear EOF
6158 seek $in_fh, tell($in_fh), 0;
6159 # The read is non-blocking: The $in_fh is set to non-blocking.
6160 # 32768 --tag = 5.1s
6161 # 327680 --tag = 4.4s
6162 # 1024000 --tag = 4.4s
6163 # 3276800 --tag = 4.3s
6164 # 32768000 --tag = 4.7s
6165 # 10240000 --tag = 4.3s
6166 while(read($in_fh,substr($$partial,length $$partial),3276800)) {
6167 # Append to $$partial
6168 # Find the last \n
6169 my $i = rindex($$partial,"\n");
6170 if($i != -1) {
6171 # One or more complete lines were found
6172 if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) {
6173 # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
6174 # This is a crappy way of ignoring it.
6175 $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
6176 # Length of partial line has changed: Find the last \n again
6177 $i = rindex($$partial,"\n");
6178 }
6179 if($opt::tag or defined $opt::tagstring) {
6180 # Replace ^ with $tag within the full line
6181 my $tag = $self->tag();
6182 substr($$partial,0,$i+1) =~ s/^/$tag/gm;
6183 # Length of partial line has changed: Find the last \n again
6184 $i = rindex($$partial,"\n");
6185 }
6186 # Print up to and including the last \n
6187 print $out_fd substr($$partial,0,$i+1);
6188 # Remove the printed part
6189 substr($$partial,0,$i+1)="";
6190 }
6191 }
6192 if(defined $self->{'exitstatus'}) {
6193 # If the job is dead: print the remaining partial line
6194 # read remaining
6195 if($$partial and ($opt::tag or defined $opt::tagstring)) {
6196 my $tag = $self->tag();
6197 $$partial =~ s/^/$tag/gm;
6198 }
6199 print $out_fd $$partial;
6200 # Release the memory
6201 $$partial = undef;
6202 if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) {
6203 # decompress still running
6204 } else {
6205 # decompress done: close fh
6206 close $in_fh;
6207 }
6208 }
6209 }
6210
6211 sub print_joblog {
6212 my $self = shift;
6213 my $cmd;
6214 if($Global::verbose <= 1) {
6215 $cmd = $self->replaced();
6216 } else {
6217 # Verbose level > 1: Print the rsync and stuff
6218 $cmd = "@command";
6219 }
6220 print $Global::joblog
6221 join("\t", $self->seq(), $self->sshlogin()->string(),
6222 $self->starttime(), sprintf("%10.3f",$self->runtime()),
6223 $self->transfersize(), $self->returnsize(),
6224 $self->exitstatus(), $self->exitsignal(), $cmd
6225 ). "\n";
6226 flush $Global::joblog;
6227 $self->set_job_in_joblog();
6228 }
6229
6230 sub tag {
6231 my $self = shift;
6232 if(not defined $self->{'tag'}) {
6233 $self->{'tag'} = $self->{'commandline'}->
6234 replace_placeholders([$opt::tagstring],0,0)."\t";
6235 }
6236 return $self->{'tag'};
6237 }
6238
6239 sub hostgroups {
6240 my $self = shift;
6241 if(not defined $self->{'hostgroups'}) {
6242 $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
6243 }
6244 return @{$self->{'hostgroups'}};
6245 }
6246
6247 sub exitstatus {
6248 my $self = shift;
6249 return $self->{'exitstatus'};
6250 }
6251
6252 sub set_exitstatus {
6253 my $self = shift;
6254 my $exitstatus = shift;
6255 if($exitstatus) {
6256 # Overwrite status if non-zero
6257 $self->{'exitstatus'} = $exitstatus;
6258 } else {
6259 # Set status but do not overwrite
6260 # Status may have been set by --timeout
6261 $self->{'exitstatus'} ||= $exitstatus;
6262 }
6263 }
6264
6265 sub exitsignal {
6266 my $self = shift;
6267 return $self->{'exitsignal'};
6268 }
6269
6270 sub set_exitsignal {
6271 my $self = shift;
6272 my $exitsignal = shift;
6273 $self->{'exitsignal'} = $exitsignal;
6274 }
6275
6276 {
6277 my ($disk_full_fh, $b8193, $name);
6278 sub exit_if_disk_full {
6279 # Checks if $TMPDIR is full by writing 8kb to a tmpfile
6280 # If the disk is full: Exit immediately.
6281 # Returns:
6282 # N/A
6283 if(not $disk_full_fh) {
6284 ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
6285 unlink $name;
6286 $b8193 = "x"x8193;
6287 }
6288 # Linux does not discover if a disk is full if writing <= 8192
6289 # Tested on:
6290 # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
6291 # ntfs reiserfs tmpfs ubifs vfat xfs
6292 # TODO this should be tested on different OS similar to this:
6293 #
6294 # doit() {
6295 # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
6296 # seq 100000 | parallel --tmpdir /mnt/loop/ true &
6297 # seq 6900000 > /mnt/loop/i && echo seq OK
6298 # seq 6980868 > /mnt/loop/i
6299 # seq 10000 > /mnt/loop/ii
6300 # sleep 3
6301 # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
6302 # echo >&2
6303 # }
6304 print $disk_full_fh $b8193;
6305 if(not $disk_full_fh
6306 or
6307 tell $disk_full_fh == 0) {
6308 ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
6309 ::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
6310 ::wait_and_exit(255);
6311 }
6312 truncate $disk_full_fh, 0;
6313 seek($disk_full_fh, 0, 0) || die;
6314 }
6315 }
6316
6317
6318 package CommandLine;
6319
6320 sub new {
6321 my $class = shift;
6322 my $seq = shift;
6323 my $commandref = shift;
6324 $commandref || die;
6325 my $arg_queue = shift;
6326 my $context_replace = shift;
6327 my $max_number_of_args = shift; # for -N and normal (-n1)
6328 my $return_files = shift;
6329 my $replacecount_ref = shift;
6330 my $len_ref = shift;
6331 my %replacecount = %$replacecount_ref;
6332 my %len = %$len_ref;
6333 for (keys %$replacecount_ref) {
6334 # Total length of this replacement string {} replaced with all args
6335 $len{$_} = 0;
6336 }
6337 return bless {
6338 'command' => $commandref,
6339 'seq' => $seq,
6340 'len' => \%len,
6341 'arg_list' => [],
6342 'arg_queue' => $arg_queue,
6343 'max_number_of_args' => $max_number_of_args,
6344 'replacecount' => \%replacecount,
6345 'context_replace' => $context_replace,
6346 'return_files' => $return_files,
6347 'replaced' => undef,
6348 }, ref($class) || $class;
6349 }
6350
6351 sub seq {
6352 my $self = shift;
6353 return $self->{'seq'};
6354 }
6355
6356 {
6357 my $max_slot_number;
6358
6359 sub slot {
6360 # Find the number of a free job slot and return it
6361 # Uses:
6362 # @Global::slots
6363 # Returns:
6364 # $jobslot = number of jobslot
6365 my $self = shift;
6366 if(not $self->{'slot'}) {
6367 if(not @Global::slots) {
6368 # $Global::max_slot_number will typically be $Global::max_jobs_running
6369 push @Global::slots, ++$max_slot_number;
6370 }
6371 $self->{'slot'} = shift @Global::slots;
6372 }
6373 return $self->{'slot'};
6374 }
6375 }
6376
6377 sub populate {
6378 # Add arguments from arg_queue until the number of arguments or
6379 # max line length is reached
6380 # Uses:
6381 # $Global::minimal_command_line_length
6382 # $opt::cat
6383 # $opt::fifo
6384 # $Global::JobQueue
6385 # $opt::m
6386 # $opt::X
6387 # $CommandLine::already_spread
6388 # $Global::max_jobs_running
6389 # Returns: N/A
6390 my $self = shift;
6391 my $next_arg;
6392 my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length();
6393
6394 if($opt::cat or $opt::fifo) {
6395 # Generate a tempfile name that will be used as {}
6396 my($outfh,$name) = ::tmpfile(SUFFIX => ".pip");
6397 close $outfh;
6398 # Unlink is needed if: ssh otheruser@localhost
6399 unlink $name;
6400 $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]);
6401 }
6402
6403 while (not $self->{'arg_queue'}->empty()) {
6404 $next_arg = $self->{'arg_queue'}->get();
6405 if(not defined $next_arg) {
6406 next;
6407 }
6408 $self->push($next_arg);
6409 if($self->len() >= $max_len) {
6410 # Command length is now > max_length
6411 # If there are arguments: remove the last
6412 # If there are no arguments: Error
6413 # TODO stuff about -x opt_x
6414 if($self->number_of_args() > 1) {
6415 # There is something to work on
6416 $self->{'arg_queue'}->unget($self->pop());
6417 last;
6418 } else {
6419 my $args = join(" ", map { $_->orig() } @$next_arg);
6420 ::error("Command line too long (",
6421 $self->len(), " >= ",
6422 $max_len,
6423 ") at number ",
6424 $self->{'arg_queue'}->arg_number(),
6425 ": ".
6426 (substr($args,0,50))."...\n");
6427 $self->{'arg_queue'}->unget($self->pop());
6428 ::wait_and_exit(255);
6429 }
6430 }
6431
6432 if(defined $self->{'max_number_of_args'}) {
6433 if($self->number_of_args() >= $self->{'max_number_of_args'}) {
6434 last;
6435 }
6436 }
6437 }
6438 if(($opt::m or $opt::X) and not $CommandLine::already_spread
6439 and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
6440 # -m or -X and EOF => Spread the arguments over all jobslots
6441 # (unless they are already spread)
6442 $CommandLine::already_spread ||= 1;
6443 if($self->number_of_args() > 1) {
6444 $self->{'max_number_of_args'} =
6445 ::ceil($self->number_of_args()/$Global::max_jobs_running);
6446 $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
6447 $self->{'max_number_of_args'};
6448 $self->{'arg_queue'}->unget($self->pop_all());
6449 while($self->number_of_args() < $self->{'max_number_of_args'}) {
6450 $self->push($self->{'arg_queue'}->get());
6451 }
6452 }
6453 }
6454 }
6455
6456 sub push {
6457 # Add one or more records as arguments
6458 # Returns: N/A
6459 my $self = shift;
6460 my $record = shift;
6461 push @{$self->{'arg_list'}}, $record;
6462
6463 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
6464 my $rep;
6465 for my $arg (@$record) {
6466 if(defined $arg) {
6467 for my $perlexpr (keys %{$self->{'replacecount'}}) {
6468 # 50% faster than below
6469 $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self);
6470 # $rep = $arg->replace($perlexpr,$quote_arg,$self);
6471 # $self->{'len'}{$perlexpr} += length $rep;
6472 # ::debug("length", "Length: ", length $rep,
6473 # "(", $perlexpr, "=>", $rep, ")\n");
6474 }
6475 }
6476 }
6477 }
6478
6479 sub pop {
6480 # Remove last argument
6481 # Returns:
6482 # the last record
6483 my $self = shift;
6484 my $record = pop @{$self->{'arg_list'}};
6485 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
6486 for my $arg (@$record) {
6487 if(defined $arg) {
6488 for my $perlexpr (keys %{$self->{'replacecount'}}) {
6489 $self->{'len'}{$perlexpr} -=
6490 length $arg->replace($perlexpr,$quote_arg,$self);
6491 }
6492 }
6493 }
6494 return $record;
6495 }
6496
6497 sub pop_all {
6498 # Remove all arguments and zeros the length of replacement strings
6499 # Returns:
6500 # all records
6501 my $self = shift;
6502 my @popped = @{$self->{'arg_list'}};
6503 for my $replacement_string (keys %{$self->{'replacecount'}}) {
6504 $self->{'len'}{$replacement_string} = 0;
6505 }
6506 $self->{'arg_list'} = [];
6507 return @popped;
6508 }
6509
6510 sub number_of_args {
6511 # The number of records
6512 # Returns:
6513 # number of records
6514 my $self = shift;
6515 # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd
6516 # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az
6517 # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq
6518 # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue.
6519 #
6520 # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG
6521 # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq
6522 # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq
6523 # eagdoq oapq.
6524 #
6525 # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz
6526 # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq
6527 # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq
6528 # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq
6529 # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq
6530 # eagdoq oapq U daf13'qp ftq eagdoq oapq
6531 # tffb://qz.iuwubqpum.ads/iuwu/DAF13
6532 #
6533 # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk
6534 # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq
6535 # tmp fa nq daf13'qp.
6536 #
6537 # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita
6538 # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq
6539 # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz.
6540 #
6541 # This is really the number of records
6542 return $#{$self->{'arg_list'}}+1;
6543 }
6544
6545 sub number_of_recargs {
6546 # The number of args in records
6547 # Returns:
6548 # number of args records
6549 my $self = shift;
6550 my $sum = 0;
6551 my $nrec = scalar @{$self->{'arg_list'}};
6552 if($nrec) {
6553 $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
6554 }
6555 return $sum;
6556 }
6557
6558 sub args_as_string {
6559 # Returns:
6560 # all unmodified arguments joined with ' ' (similar to {})
6561 my $self = shift;
6562 return (join " ", map { $_->orig() }
6563 map { @$_ } @{$self->{'arg_list'}});
6564 }
6565
6566 sub args_as_dirname {
6567 # Returns:
6568 # all unmodified arguments joined with '/' (similar to {})
6569 # \t \0 \\ and / are quoted as: \t \0 \\ \_
6570 # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
6571 my $self = shift;
6572 my @res = ();
6573
6574 for my $rec_ref (@{$self->{'arg_list'}}) {
6575 # If headers are used, sort by them.
6576 # Otherwise keep the order from the command line.
6577 my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
6578 for my $n (@header_indexes_sorted) {
6579 CORE::push(@res,
6580 $Global::input_source_header{$n},
6581 map { my $s = $_;
6582 # \t \0 \\ and / are quoted as: \t \0 \\ \_
6583 $s =~ s/\\/\\\\/g;
6584 $s =~ s/\t/\\t/g;
6585 $s =~ s/\0/\\0/g;
6586 $s =~ s:/:\\_:g;
6587 if($Global::max_file_length) {
6588 # Keep each subdir shorter than the longest
6589 # allowed file name
6590 $s = substr($s,0,$Global::max_file_length);
6591 }
6592 $s; }
6593 $rec_ref->[$n-1]->orig());
6594 }
6595 }
6596 return join "/", @res;
6597 }
6598
6599 sub header_indexes_sorted {
6600 # Sort headers first by number then by name.
6601 # E.g.: 1a 1b 11a 11b
6602 # Returns:
6603 # Indexes of %Global::input_source_header sorted
6604 my $max_col = shift;
6605
6606 no warnings 'numeric';
6607 for my $col (1 .. $max_col) {
6608 # Make sure the header is defined. If it is not: use column number
6609 if(not defined $Global::input_source_header{$col}) {
6610 $Global::input_source_header{$col} = $col;
6611 }
6612 }
6613 my @header_indexes_sorted = sort {
6614 # Sort headers numerically then asciibetically
6615 $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
6616 or
6617 $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
6618 } 1 .. $max_col;
6619 return @header_indexes_sorted;
6620 }
6621
6622 sub len {
6623 # Uses:
6624 # $opt::shellquote
6625 # The length of the command line with args substituted
6626 my $self = shift;
6627 my $len = 0;
6628 # Add length of the original command with no args
6629 # Length of command w/ all replacement args removed
6630 $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
6631 ::debug("length", "noncontext + command: $len\n");
6632 my $recargs = $self->number_of_recargs();
6633 if($self->{'context_replace'}) {
6634 # Context is duplicated for each arg
6635 $len += $recargs * $self->{'len'}{'context'};
6636 for my $replstring (keys %{$self->{'replacecount'}}) {
6637 # If the replacements string is more than once: mulitply its length
6638 $len += $self->{'len'}{$replstring} *
6639 $self->{'replacecount'}{$replstring};
6640 ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
6641 $self->{'replacecount'}{$replstring}, "\n");
6642 }
6643 # echo 11 22 33 44 55 66 77 88 99 1010
6644 # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
6645 # 5 + ctxgrp*arg
6646 ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
6647 " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
6648 # Add space between context groups
6649 $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
6650 } else {
6651 # Each replacement string may occur several times
6652 # Add the length for each time
6653 $len += 1*$self->{'len'}{'context'};
6654 ::debug("length", "context+noncontext + command: $len\n");
6655 for my $replstring (keys %{$self->{'replacecount'}}) {
6656 # (space between regargs + length of replacement)
6657 # * number this replacement is used
6658 $len += ($recargs -1 + $self->{'len'}{$replstring}) *
6659 $self->{'replacecount'}{$replstring};
6660 }
6661 }
6662 if($opt::nice) {
6663 # Pessimistic length if --nice is set
6664 # Worse than worst case: every char needs to be quoted with \
6665 $len *= 2;
6666 }
6667 if($Global::quoting) {
6668 # Pessimistic length if -q is set
6669 # Worse than worst case: every char needs to be quoted with \
6670 $len *= 2;
6671 }
6672 if($opt::shellquote) {
6673 # Pessimistic length if --shellquote is set
6674 # Worse than worst case: every char needs to be quoted with \ twice
6675 $len *= 4;
6676 }
6677 # If we are using --env, add the prefix for that, too.
6678 $len += $Global::envvarlen;
6679
6680 return $len;
6681 }
6682
6683 sub replaced {
6684 # Uses:
6685 # $Global::noquote
6686 # $Global::quoting
6687 # Returns:
6688 # $replaced = command with place holders replaced and prepended
6689 my $self = shift;
6690 if(not defined $self->{'replaced'}) {
6691 # Don't quote arguments if the input is the full command line
6692 my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
6693 $self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg);
6694 my $len = length $self->{'replaced'};
6695 if ($len != $self->len()) {
6696 ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n");
6697 } else {
6698 ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n");
6699 }
6700 }
6701 return $self->{'replaced'};
6702 }
6703
6704 sub replace_placeholders {
6705 # Replace foo{}bar with fooargbar
6706 # Input:
6707 # $targetref = command as shell words
6708 # $quote = should everything be quoted?
6709 # $quote_arg = should replaced arguments be quoted?
6710 # Returns:
6711 # @target with placeholders replaced
6712 my $self = shift;
6713 my $targetref = shift;
6714 my $quote = shift;
6715 my $quote_arg = shift;
6716 my $context_replace = $self->{'context_replace'};
6717 my @target = @$targetref;
6718 ::debug("replace", "Replace @target\n");
6719 # -X = context replace
6720 # maybe multiple input sources
6721 # maybe --xapply
6722 if(not @target) {
6723 # @target is empty: Return empty array
6724 return @target;
6725 }
6726 # Fish out the words that have replacement strings in them
6727 my %word;
6728 for (@target) {
6729 my $tt = $_;
6730 ::debug("replace", "Target: $tt");
6731 # a{1}b{}c{}d
6732 # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d
6733 # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d
6734 # A B C => aAbA B CcA B Cd
6735 # -X A B C => aAbAcAd aAbBcBd aAbCcCd
6736
6737 if($context_replace) {
6738 while($tt =~ s/([^\s\257]* # before {=
6739 (?:
6740 \257< # {=
6741 [^\257]*? # The perl expression
6742 \257> # =}
6743 [^\s\257]* # after =}
6744 )+)/ /x) {
6745 # $1 = pre \257 perlexpr \257 post
6746 $word{"$1"} ||= 1;
6747 }
6748 } else {
6749 while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) {
6750 # $f = \257 perlexpr \257
6751 $word{$1} ||= 1;
6752 }
6753 }
6754 }
6755 my @word = keys %word;
6756
6757 my %replace;
6758 my @arg;
6759 for my $record (@{$self->{'arg_list'}}) {
6760 # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
6761 # Merge arg-objects from records into @arg for easy access
6762 CORE::push @arg, @$record;
6763 }
6764 # Add one arg if empty to allow {#} and {%} to be computed only once
6765 if(not @arg) { @arg = (Arg->new("")); }
6766 # Number of arguments - used for positional arguments
6767 my $n = $#_+1;
6768
6769 # This is actually a CommandLine-object,
6770 # but it looks nice to be able to say {= $job->slot() =}
6771 my $job = $self;
6772 for my $word (@word) {
6773 # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF
6774 my $w = $word;
6775 ::debug("replace", "Replacing in $w\n");
6776
6777 # Replace positional arguments
6778 $w =~ s< ([^\s\257]*) # before {=
6779 \257< # {=
6780 (-?\d+) # Position (eg. -2 or 3)
6781 ([^\257]*?) # The perl expression
6782 \257> # =}
6783 ([^\s\257]*) # after =}
6784 >
6785 { $1. # Context (pre)
6786 (
6787 $arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace
6788 $arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self)
6789 : "")
6790 .$4 }egx;# Context (post)
6791 ::debug("replace", "Positional replaced $word with: $w\n");
6792
6793 if($w !~ /\257/) {
6794 # No more replacement strings in $w: No need to do more
6795 if($quote) {
6796 CORE::push(@{$replace{::shell_quote($word)}}, $w);
6797 } else {
6798 CORE::push(@{$replace{$word}}, $w);
6799 }
6800 next;
6801 }
6802 # for each arg:
6803 # compute replacement for each string
6804 # replace replacement strings with replacement in the word value
6805 # push to replace word value
6806 ::debug("replace", "Positional done: $w\n");
6807 for my $arg (@arg) {
6808 my $val = $w;
6809 my $number_of_replacements = 0;
6810 for my $perlexpr (keys %{$self->{'replacecount'}}) {
6811 # Replace {= perl expr =} with value for each arg
6812 $number_of_replacements +=
6813 $val =~ s{\257<\Q$perlexpr\E\257>}
6814 {$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg;
6815 }
6816 my $ww = $word;
6817 if($quote) {
6818 $ww = ::shell_quote_scalar($word);
6819 $val = ::shell_quote_scalar($val);
6820 }
6821 if($number_of_replacements) {
6822 CORE::push(@{$replace{$ww}}, $val);
6823 }
6824 }
6825 }
6826
6827 if($quote) {
6828 @target = ::shell_quote(@target);
6829 }
6830 # ::debug("replace", "%replace=",::my_dump(%replace),"\n");
6831 if(%replace) {
6832 # Substitute the replace strings with the replacement values
6833 # Must be sorted by length if a short word is a substring of a long word
6834 my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s }
6835 sort { length $b <=> length $a } keys %replace);
6836 for(@target) {
6837 s/($regexp)/join(" ",@{$replace{$1}})/ge;
6838 }
6839 }
6840 ::debug("replace", "Return @target\n");
6841 return wantarray ? @target : "@target";
6842 }
6843
6844
6845 package CommandLineQueue;
6846
6847 sub new {
6848 my $class = shift;
6849 my $commandref = shift;
6850 my $read_from = shift;
6851 my $context_replace = shift;
6852 my $max_number_of_args = shift;
6853 my $return_files = shift;
6854 my @unget = ();
6855 my ($count,%replacecount,$posrpl,$perlexpr,%len);
6856 my @command = @$commandref;
6857 # If the first command start with '-' it is probably an option
6858 if($command[0] =~ /^\s*(-\S+)/) {
6859 # Is this really a command in $PATH starting with '-'?
6860 my $cmd = $1;
6861 if(not ::which($cmd)) {
6862 ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n");
6863 ::wait_and_exit(255);
6864 }
6865 }
6866 # Replace replacement strings with {= perl expr =}
6867 # Protect matching inside {= perl expr =}
6868 # by replacing {= and =} with \257< and \257>
6869 for(@command) {
6870 if(/\257/) {
6871 ::error("Command cannot contain the character \257. Use a function for that.\n");
6872 ::wait_and_exit(255);
6873 }
6874 s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx;
6875 }
6876 for my $rpl (keys %Global::rpl) {
6877 # Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring
6878 # Avoid replacing inside existing {= perl expr =}
6879 for(@command,@Global::ret_files) {
6880 while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
6881 \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) {
6882 }
6883 }
6884 if(defined $opt::tagstring) {
6885 for($opt::tagstring) {
6886 while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
6887 \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {}
6888 }
6889 }
6890 # Do the same for the positional replacement strings
6891 # A bit harder as we have to put in the position number
6892 $posrpl = $rpl;
6893 if($posrpl =~ s/^\{//) {
6894 # Only do this if the shorthand start with {
6895 for(@command,@Global::ret_files) {
6896 s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g;
6897 }
6898 if(defined $opt::tagstring) {
6899 $opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g;
6900 }
6901 }
6902 }
6903 my $sum = 0;
6904 while($sum == 0) {
6905 # Count how many times each replacement string is used
6906 my @cmd = @command;
6907 my $contextlen = 0;
6908 my $noncontextlen = 0;
6909 my $contextgroups = 0;
6910 for my $c (@cmd) {
6911 while($c =~ s/ \257<([^\257]*?)\257> /\000/x) {
6912 # %replacecount = { "perlexpr" => number of times seen }
6913 # e.g { "$_++" => 2 }
6914 $replacecount{$1} ++;
6915 $sum++;
6916 }
6917 # Measure the length of the context around the {= perl expr =}
6918 # Use that {=...=} has been replaced with \000 above
6919 # So there is no need to deal with \257<
6920 while($c =~ s/ (\S*\000\S*) //x) {
6921 my $w = $1;
6922 $w =~ tr/\000//d; # Remove all \000's
6923 $contextlen += length($w);
6924 $contextgroups++;
6925 }
6926 # All {= perl expr =} have been removed: The rest is non-context
6927 $noncontextlen += length $c;
6928 }
6929 if($opt::tagstring) {
6930 my $t = $opt::tagstring;
6931 while($t =~ s/ \257<([^\257]*)\257> //x) {
6932 # %replacecount = { "perlexpr" => number of times seen }
6933 # e.g { "$_++" => 2 }
6934 # But for tagstring we just need to mark it as seen
6935 $replacecount{$1}||=1;
6936 }
6937 }
6938
6939 $len{'context'} = 0+$contextlen;
6940 $len{'noncontext'} = $noncontextlen;
6941 $len{'contextgroups'} = $contextgroups;
6942 $len{'noncontextgroups'} = @cmd-$contextgroups;
6943 ::debug("length", "@command Context: ", $len{'context'},
6944 " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
6945 " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
6946 if($sum == 0) {
6947 # Default command = {}
6948 # If not replacement string: append {}
6949 if(not @command) {
6950 @command = ("\257<\257>");
6951 $Global::noquote = 1;
6952 } elsif(($opt::pipe or $opt::pipepart)
6953 and not $opt::fifo and not $opt::cat) {
6954 # With --pipe / --pipe-part you can have no replacement
6955 last;
6956 } else {
6957 # Append {} to the command if there are no {...}'s and no {=...=}
6958 push @command, ("\257<\257>");
6959 }
6960 }
6961 }
6962
6963 return bless {
6964 'unget' => \@unget,
6965 'command' => \@command,
6966 'replacecount' => \%replacecount,
6967 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
6968 'context_replace' => $context_replace,
6969 'len' => \%len,
6970 'max_number_of_args' => $max_number_of_args,
6971 'size' => undef,
6972 'return_files' => $return_files,
6973 'seq' => 1,
6974 }, ref($class) || $class;
6975 }
6976
6977 sub get {
6978 my $self = shift;
6979 if(@{$self->{'unget'}}) {
6980 my $cmd_line = shift @{$self->{'unget'}};
6981 return ($cmd_line);
6982 } else {
6983 my $cmd_line;
6984 $cmd_line = CommandLine->new($self->seq(),
6985 $self->{'command'},
6986 $self->{'arg_queue'},
6987 $self->{'context_replace'},
6988 $self->{'max_number_of_args'},
6989 $self->{'return_files'},
6990 $self->{'replacecount'},
6991 $self->{'len'},
6992 );
6993 $cmd_line->populate();
6994 ::debug("init","cmd_line->number_of_args ",
6995 $cmd_line->number_of_args(), "\n");
6996 if($opt::pipe or $opt::pipepart) {
6997 if($cmd_line->replaced() eq "") {
6998 # Empty command - pipe requires a command
6999 ::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
7000 ::wait_and_exit(255);
7001 }
7002 } else {
7003 if($cmd_line->number_of_args() == 0) {
7004 # We did not get more args - maybe at EOF string?
7005 return undef;
7006 } elsif($cmd_line->replaced() eq "") {
7007 # Empty command - get the next instead
7008 return $self->get();
7009 }
7010 }
7011 $self->set_seq($self->seq()+1);
7012 return $cmd_line;
7013 }
7014 }
7015
7016 sub unget {
7017 my $self = shift;
7018 unshift @{$self->{'unget'}}, @_;
7019 }
7020
7021 sub empty {
7022 my $self = shift;
7023 my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty();
7024 ::debug("run", "CommandLineQueue->empty $empty");
7025 return $empty;
7026 }
7027
7028 sub seq {
7029 my $self = shift;
7030 return $self->{'seq'};
7031 }
7032
7033 sub set_seq {
7034 my $self = shift;
7035 $self->{'seq'} = shift;
7036 }
7037
7038 sub quote_args {
7039 my $self = shift;
7040 # If there is not command emulate |bash
7041 return $self->{'command'};
7042 }
7043
7044 sub size {
7045 my $self = shift;
7046 if(not $self->{'size'}) {
7047 my @all_lines = ();
7048 while(not $self->{'arg_queue'}->empty()) {
7049 push @all_lines, CommandLine->new($self->{'command'},
7050 $self->{'arg_queue'},
7051 $self->{'context_replace'},
7052 $self->{'max_number_of_args'});
7053 }
7054 $self->{'size'} = @all_lines;
7055 $self->unget(@all_lines);
7056 }
7057 return $self->{'size'};
7058 }
7059
7060
7061 package Limits::Command;
7062
7063 # Maximal command line length (for -m and -X)
7064 sub max_length {
7065 # Find the max_length of a command line and cache it
7066 # Returns:
7067 # number of chars on the longest command line allowed
7068 if(not $Limits::Command::line_max_len) {
7069 # Disk cache of max command line length
7070 my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname();
7071 my $cached_limit;
7072 if(-e $len_cache) {
7073 open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
7074 $cached_limit = <$fh>;
7075 close $fh;
7076 } else {
7077 $cached_limit = real_max_length();
7078 # If $HOME is write protected: Do not fail
7079 mkdir($ENV{'HOME'} . "/.parallel");
7080 mkdir($ENV{'HOME'} . "/.parallel/tmp");
7081 open(my $fh, ">", $len_cache);
7082 print $fh $cached_limit;
7083 close $fh;
7084 }
7085 $Limits::Command::line_max_len = $cached_limit;
7086 if($opt::max_chars) {
7087 if($opt::max_chars <= $cached_limit) {
7088 $Limits::Command::line_max_len = $opt::max_chars;
7089 } else {
7090 ::warning("Value for -s option ",
7091 "should be < $cached_limit.\n");
7092 }
7093 }
7094 }
7095 return $Limits::Command::line_max_len;
7096 }
7097
7098 sub real_max_length {
7099 # Find the max_length of a command line
7100 # Returns:
7101 # The maximal command line length
7102 # Use an upper bound of 8 MB if the shell allows for for infinite long lengths
7103 my $upper = 8_000_000;
7104 my $len = 8;
7105 do {
7106 if($len > $upper) { return $len };
7107 $len *= 16;
7108 } while (is_acceptable_command_line_length($len));
7109 # Then search for the actual max length between 0 and upper bound
7110 return binary_find_max_length(int($len/16),$len);
7111 }
7112
7113 sub binary_find_max_length {
7114 # Given a lower and upper bound find the max_length of a command line
7115 # Returns:
7116 # number of chars on the longest command line allowed
7117 my ($lower, $upper) = (@_);
7118 if($lower == $upper or $lower == $upper-1) { return $lower; }
7119 my $middle = int (($upper-$lower)/2 + $lower);
7120 ::debug("init", "Maxlen: $lower,$upper,$middle : ");
7121 if (is_acceptable_command_line_length($middle)) {
7122 return binary_find_max_length($middle,$upper);
7123 } else {
7124 return binary_find_max_length($lower,$middle);
7125 }
7126 }
7127
7128 sub is_acceptable_command_line_length {
7129 # Test if a command line of this length can run
7130 # Returns:
7131 # 0 if the command line length is too long
7132 # 1 otherwise
7133 my $len = shift;
7134
7135 local *STDERR;
7136 open (STDERR, ">", "/dev/null");
7137 system "true "."x"x$len;
7138 close STDERR;
7139 ::debug("init", "$len=$? ");
7140 return not $?;
7141 }
7142
7143
7144 package RecordQueue;
7145
7146 sub new {
7147 my $class = shift;
7148 my $fhs = shift;
7149 my $colsep = shift;
7150 my @unget = ();
7151 my $arg_sub_queue;
7152 if($colsep) {
7153 # Open one file with colsep
7154 $arg_sub_queue = RecordColQueue->new($fhs);
7155 } else {
7156 # Open one or more files if multiple -a
7157 $arg_sub_queue = MultifileQueue->new($fhs);
7158 }
7159 return bless {
7160 'unget' => \@unget,
7161 'arg_number' => 0,
7162 'arg_sub_queue' => $arg_sub_queue,
7163 }, ref($class) || $class;
7164 }
7165
7166 sub get {
7167 # Returns:
7168 # reference to array of Arg-objects
7169 my $self = shift;
7170 if(@{$self->{'unget'}}) {
7171 $self->{'arg_number'}++;
7172 return shift @{$self->{'unget'}};
7173 }
7174 my $ret = $self->{'arg_sub_queue'}->get();
7175 if(defined $Global::max_number_of_args
7176 and $Global::max_number_of_args == 0) {
7177 ::debug("run", "Read 1 but return 0 args\n");
7178 return [Arg->new("")];
7179 } else {
7180 return $ret;
7181 }
7182 }
7183
7184 sub unget {
7185 my $self = shift;
7186 ::debug("run", "RecordQueue-unget '@_'\n");
7187 $self->{'arg_number'} -= @_;
7188 unshift @{$self->{'unget'}}, @_;
7189 }
7190
7191 sub empty {
7192 my $self = shift;
7193 my $empty = not @{$self->{'unget'}};
7194 $empty &&= $self->{'arg_sub_queue'}->empty();
7195 ::debug("run", "RecordQueue->empty $empty");
7196 return $empty;
7197 }
7198
7199 sub arg_number {
7200 my $self = shift;
7201 return $self->{'arg_number'};
7202 }
7203
7204
7205 package RecordColQueue;
7206
7207 sub new {
7208 my $class = shift;
7209 my $fhs = shift;
7210 my @unget = ();
7211 my $arg_sub_queue = MultifileQueue->new($fhs);
7212 return bless {
7213 'unget' => \@unget,
7214 'arg_sub_queue' => $arg_sub_queue,
7215 }, ref($class) || $class;
7216 }
7217
7218 sub get {
7219 # Returns:
7220 # reference to array of Arg-objects
7221 my $self = shift;
7222 if(@{$self->{'unget'}}) {
7223 return shift @{$self->{'unget'}};
7224 }
7225 my $unget_ref=$self->{'unget'};
7226 if($self->{'arg_sub_queue'}->empty()) {
7227 return undef;
7228 }
7229 my $in_record = $self->{'arg_sub_queue'}->get();
7230 if(defined $in_record) {
7231 my @out_record = ();
7232 for my $arg (@$in_record) {
7233 ::debug("run", "RecordColQueue::arg $arg\n");
7234 my $line = $arg->orig();
7235 ::debug("run", "line='$line'\n");
7236 if($line ne "") {
7237 for my $s (split /$opt::colsep/o, $line, -1) {
7238 push @out_record, Arg->new($s);
7239 }
7240 } else {
7241 push @out_record, Arg->new("");
7242 }
7243 }
7244 return \@out_record;
7245 } else {
7246 return undef;
7247 }
7248 }
7249
7250 sub unget {
7251 my $self = shift;
7252 ::debug("run", "RecordColQueue-unget '@_'\n");
7253 unshift @{$self->{'unget'}}, @_;
7254 }
7255
7256 sub empty {
7257 my $self = shift;
7258 my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty());
7259 ::debug("run", "RecordColQueue->empty $empty");
7260 return $empty;
7261 }
7262
7263
7264 package MultifileQueue;
7265
7266 @Global::unget_argv=();
7267
7268 sub new {
7269 my $class = shift;
7270 my $fhs = shift;
7271 for my $fh (@$fhs) {
7272 if(-t $fh) {
7273 ::warning("Input is read from the terminal. ".
7274 "Only experts do this on purpose. ".
7275 "Press CTRL-D to exit.\n");
7276 }
7277 }
7278 return bless {
7279 'unget' => \@Global::unget_argv,
7280 'fhs' => $fhs,
7281 'arg_matrix' => undef,
7282 }, ref($class) || $class;
7283 }
7284
7285 sub get {
7286 my $self = shift;
7287 if($opt::xapply) {
7288 return $self->xapply_get();
7289 } else {
7290 return $self->nest_get();
7291 }
7292 }
7293
7294 sub unget {
7295 my $self = shift;
7296 ::debug("run", "MultifileQueue-unget '@_'\n");
7297 unshift @{$self->{'unget'}}, @_;
7298 }
7299
7300 sub empty {
7301 my $self = shift;
7302 my $empty = (not @Global::unget_argv
7303 and not @{$self->{'unget'}});
7304 for my $fh (@{$self->{'fhs'}}) {
7305 $empty &&= eof($fh);
7306 }
7307 ::debug("run", "MultifileQueue->empty $empty ");
7308 return $empty;
7309 }
7310
7311 sub xapply_get {
7312 my $self = shift;
7313 if(@{$self->{'unget'}}) {
7314 return shift @{$self->{'unget'}};
7315 }
7316 my @record = ();
7317 my $prepend = undef;
7318 my $empty = 1;
7319 for my $fh (@{$self->{'fhs'}}) {
7320 my $arg = read_arg_from_fh($fh);
7321 if(defined $arg) {
7322 # Record $arg for recycling at end of file
7323 push @{$self->{'arg_matrix'}{$fh}}, $arg;
7324 push @record, $arg;
7325 $empty = 0;
7326 } else {
7327 ::debug("run", "EOA ");
7328 # End of file: Recycle arguments
7329 push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
7330 # return last @{$args->{'args'}{$fh}};
7331 push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
7332 }
7333 }
7334 if($empty) {
7335 return undef;
7336 } else {
7337 return \@record;
7338 }
7339 }
7340
7341 sub nest_get {
7342 my $self = shift;
7343 if(@{$self->{'unget'}}) {
7344 return shift @{$self->{'unget'}};
7345 }
7346 my @record = ();
7347 my $prepend = undef;
7348 my $empty = 1;
7349 my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
7350 if(not $self->{'arg_matrix'}) {
7351 # Initialize @arg_matrix with one arg from each file
7352 # read one line from each file
7353 my @first_arg_set;
7354 my $all_empty = 1;
7355 for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
7356 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
7357 if(defined $arg) {
7358 $all_empty = 0;
7359 }
7360 $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
7361 push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
7362 }
7363 if($all_empty) {
7364 # All filehandles were at eof or eof-string
7365 return undef;
7366 }
7367 return [@first_arg_set];
7368 }
7369
7370 # Treat the case with one input source special. For multiple
7371 # input sources we need to remember all previously read values to
7372 # generate all combinations. But for one input source we can
7373 # forget the value after first use.
7374 if($no_of_inputsources == 1) {
7375 my $arg = read_arg_from_fh($self->{'fhs'}[0]);
7376 if(defined($arg)) {
7377 return [$arg];
7378 }
7379 return undef;
7380 }
7381 for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
7382 if(eof($self->{'fhs'}[$fhno])) {
7383 next;
7384 } else {
7385 # read one
7386 my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
7387 defined($arg) || next; # If we just read an EOF string: Treat this as EOF
7388 my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
7389 $self->{'arg_matrix'}[$fhno][$len] = $arg;
7390 # make all new combinations
7391 my @combarg = ();
7392 for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
7393 push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}];
7394 }
7395 $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry
7396 # map combinations
7397 # [ 1, 3, 7 ], [ 2, 4, 1 ]
7398 # =>
7399 # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ]
7400 my @mapped;
7401 for my $c (expand_combinations(@combarg)) {
7402 my @a;
7403 for my $n (0 .. $no_of_inputsources - 1 ) {
7404 push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
7405 }
7406 push @mapped, \@a;
7407 }
7408 # append the mapped to the ungotten arguments
7409 push @{$self->{'unget'}}, @mapped;
7410 # get the first
7411 return shift @{$self->{'unget'}};
7412 }
7413 }
7414 # all are eof or at EOF string; return from the unget queue
7415 return shift @{$self->{'unget'}};
7416 }
7417
7418 sub read_arg_from_fh {
7419 # Read one Arg from filehandle
7420 # Returns:
7421 # Arg-object with one read line
7422 # undef if end of file
7423 my $fh = shift;
7424 my $prepend = undef;
7425 my $arg;
7426 do {{
7427 # This makes 10% faster
7428 if(not ($arg = <$fh>)) {
7429 if(defined $prepend) {
7430 return Arg->new($prepend);
7431 } else {
7432 return undef;
7433 }
7434 }
7435 # ::debug("run", "read $arg\n");
7436 # Remove delimiter
7437 $arg =~ s:$/$::;
7438 if($Global::end_of_file_string and
7439 $arg eq $Global::end_of_file_string) {
7440 # Ignore the rest of input file
7441 close $fh;
7442 ::debug("run", "EOF-string ($arg) met\n");
7443 if(defined $prepend) {
7444 return Arg->new($prepend);
7445 } else {
7446 return undef;
7447 }
7448 }
7449 if(defined $prepend) {
7450 $arg = $prepend.$arg; # For line continuation
7451 $prepend = undef; #undef;
7452 }
7453 if($Global::ignore_empty) {
7454 if($arg =~ /^\s*$/) {
7455 redo; # Try the next line
7456 }
7457 }
7458 if($Global::max_lines) {
7459 if($arg =~ /\s$/) {
7460 # Trailing space => continued on next line
7461 $prepend = $arg;
7462 redo;
7463 }
7464 }
7465 }} while (1 == 0); # Dummy loop {{}} for redo
7466 if(defined $arg) {
7467 return Arg->new($arg);
7468 } else {
7469 ::die_bug("multiread arg undefined");
7470 }
7471 }
7472
7473 sub expand_combinations {
7474 # Input:
7475 # ([xmin,xmax], [ymin,ymax], ...)
7476 # Returns: ([x,y,...],[x,y,...])
7477 # where xmin <= x <= xmax and ymin <= y <= ymax
7478 my $minmax_ref = shift;
7479 my $xmin = $$minmax_ref[0];
7480 my $xmax = $$minmax_ref[1];
7481 my @p;
7482 if(@_) {
7483 # If there are more columns: Compute those recursively
7484 my @rest = expand_combinations(@_);
7485 for(my $x = $xmin; $x <= $xmax; $x++) {
7486 push @p, map { [$x, @$_] } @rest;
7487 }
7488 } else {
7489 for(my $x = $xmin; $x <= $xmax; $x++) {
7490 push @p, [$x];
7491 }
7492 }
7493 return @p;
7494 }
7495
7496
7497 package Arg;
7498
7499 sub new {
7500 my $class = shift;
7501 my $orig = shift;
7502 my @hostgroups;
7503 if($opt::hostgroups) {
7504 if($orig =~ s:@(.+)::) {
7505 # We found hostgroups on the arg
7506 @hostgroups = split(/\+/, $1);
7507 if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
7508 ::warning("No such hostgroup (@hostgroups)\n");
7509 @hostgroups = (keys %Global::hostgroups);
7510 }
7511 } else {
7512 @hostgroups = (keys %Global::hostgroups);
7513 }
7514 }
7515 return bless {
7516 'orig' => $orig,
7517 'hostgroups' => \@hostgroups,
7518 }, ref($class) || $class;
7519 }
7520
7521 sub replace {
7522 # Calculates the corresponding value for a given perl expression
7523 # Returns:
7524 # The calculated string (quoted if asked for)
7525 my $self = shift;
7526 my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
7527 my $quote = (shift) ? 1 : 0; # should the string be quoted?
7528 # This is actually a CommandLine-object,
7529 # but it looks nice to be able to say {= $job->slot() =}
7530 my $job = shift;
7531 $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace
7532 if(not defined $self->{"rpl",0,$perlexpr}) {
7533 local $_;
7534 if($Global::trim eq "n") {
7535 $_ = $self->{'orig'};
7536 } else {
7537 $_ = trim_of($self->{'orig'});
7538 }
7539 ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
7540 if(not $Global::perleval{$perlexpr}) {
7541 # Make an anonymous function of the $perlexpr
7542 # And more importantly: Compile it only once
7543 if($Global::perleval{$perlexpr} =
7544 eval('sub { no strict; no warnings; my $job = shift; '.
7545 $perlexpr.' }')) {
7546 # All is good
7547 } else {
7548 # The eval failed. Maybe $perlexpr is invalid perl?
7549 ::error("Cannot use $perlexpr: $@\n");
7550 ::wait_and_exit(255);
7551 }
7552 }
7553 # Execute the function
7554 $Global::perleval{$perlexpr}->($job);
7555 $self->{"rpl",0,$perlexpr} = $_;
7556 }
7557 if(not defined $self->{"rpl",$quote,$perlexpr}) {
7558 $self->{"rpl",1,$perlexpr} =
7559 ::shell_quote_scalar($self->{"rpl",0,$perlexpr});
7560 }
7561 return $self->{"rpl",$quote,$perlexpr};
7562 }
7563
7564 sub orig {
7565 my $self = shift;
7566 return $self->{'orig'};
7567 }
7568
7569 sub trim_of {
7570 # Removes white space as specifed by --trim:
7571 # n = nothing
7572 # l = start
7573 # r = end
7574 # lr|rl = both
7575 # Returns:
7576 # string with white space removed as needed
7577 my @strings = map { defined $_ ? $_ : "" } (@_);
7578 my $arg;
7579 if($Global::trim eq "n") {
7580 # skip
7581 } elsif($Global::trim eq "l") {
7582 for my $arg (@strings) { $arg =~ s/^\s+//; }
7583 } elsif($Global::trim eq "r") {
7584 for my $arg (@strings) { $arg =~ s/\s+$//; }
7585 } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
7586 for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
7587 } else {
7588 ::error("--trim must be one of: r l rl lr.\n");
7589 ::wait_and_exit(255);
7590 }
7591 return wantarray ? @strings : "@strings";
7592 }
7593
7594
7595 package TimeoutQueue;
7596
7597 sub new {
7598 my $class = shift;
7599 my $delta_time = shift;
7600 my ($pct);
7601 if($delta_time =~ /(\d+(\.\d+)?)%/) {
7602 # Timeout in percent
7603 $pct = $1/100;
7604 $delta_time = 1_000_000;
7605 }
7606 return bless {
7607 'queue' => [],
7608 'delta_time' => $delta_time,
7609 'pct' => $pct,
7610 'remedian_idx' => 0,
7611 'remedian_arr' => [],
7612 'remedian' => undef,
7613 }, ref($class) || $class;
7614 }
7615
7616 sub delta_time {
7617 my $self = shift;
7618 return $self->{'delta_time'};
7619 }
7620
7621 sub set_delta_time {
7622 my $self = shift;
7623 $self->{'delta_time'} = shift;
7624 }
7625
7626 sub remedian {
7627 my $self = shift;
7628 return $self->{'remedian'};
7629 }
7630
7631 sub set_remedian {
7632 # Set median of the last 999^3 (=997002999) values using Remedian
7633 #
7634 # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
7635 # robust averaging method for large data sets." Journal of the
7636 # American Statistical Association 85.409 (1990): 97-104.
7637 my $self = shift;
7638 my $val = shift;
7639 my $i = $self->{'remedian_idx'}++;
7640 my $rref = $self->{'remedian_arr'};
7641 $rref->[0][$i%999] = $val;
7642 $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
7643 $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
7644 $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
7645 }
7646
7647 sub update_delta_time {
7648 # Update delta_time based on runtime of finished job if timeout is
7649 # a percentage
7650 my $self = shift;
7651 my $runtime = shift;
7652 if($self->{'pct'}) {
7653 $self->set_remedian($runtime);
7654 $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
7655 ::debug("run", "Timeout: $self->{'delta_time'}s ");
7656 }
7657 }
7658
7659 sub process_timeouts {
7660 # Check if there was a timeout
7661 my $self = shift;
7662 # $self->{'queue'} is sorted by start time
7663 while (@{$self->{'queue'}}) {
7664 my $job = $self->{'queue'}[0];
7665 if($job->endtime()) {
7666 # Job already finished. No need to timeout the job
7667 # This could be because of --keep-order
7668 shift @{$self->{'queue'}};
7669 } elsif($job->timedout($self->{'delta_time'})) {
7670 # Need to shift off queue before kill
7671 # because kill calls usleep that calls process_timeouts
7672 shift @{$self->{'queue'}};
7673 $job->kill();
7674 } else {
7675 # Because they are sorted by start time the rest are later
7676 last;
7677 }
7678 }
7679 }
7680
7681 sub insert {
7682 my $self = shift;
7683 my $in = shift;
7684 push @{$self->{'queue'}}, $in;
7685 }
7686
7687
7688 package Semaphore;
7689
7690 # This package provides a counting semaphore
7691 #
7692 # If a process dies without releasing the semaphore the next process
7693 # that needs that entry will clean up dead semaphores
7694 #
7695 # The semaphores are stored in ~/.parallel/semaphores/id-<name> Each
7696 # file in ~/.parallel/semaphores/id-<name>/ is the process ID of the
7697 # process holding the entry. If the process dies, the entry can be
7698 # taken by another process.
7699
7700 sub new {
7701 my $class = shift;
7702 my $id = shift;
7703 my $count = shift;
7704 $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
7705 $id="id-".$id; # To distinguish it from a process id
7706 my $parallel_dir = $ENV{'HOME'}."/.parallel";
7707 -d $parallel_dir or mkdir_or_die($parallel_dir);
7708 my $parallel_locks = $parallel_dir."/semaphores";
7709 -d $parallel_locks or mkdir_or_die($parallel_locks);
7710 my $lockdir = "$parallel_locks/$id";
7711 my $lockfile = $lockdir.".lock";
7712 if($count < 1) { ::die_bug("semaphore-count: $count"); }
7713 return bless {
7714 'lockfile' => $lockfile,
7715 'lockfh' => Symbol::gensym(),
7716 'lockdir' => $lockdir,
7717 'id' => $id,
7718 'idfile' => $lockdir."/".$id,
7719 'pid' => $$,
7720 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
7721 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
7722 }, ref($class) || $class;
7723 }
7724
7725 sub acquire {
7726 my $self = shift;
7727 my $sleep = 1; # 1 ms
7728 my $start_time = time;
7729 while(1) {
7730 $self->atomic_link_if_count_less_than() and last;
7731 ::debug("sem", "Remove dead locks");
7732 my $lockdir = $self->{'lockdir'};
7733 for my $d (glob "$lockdir/*") {
7734 ::debug("sem", "Lock $d $lockdir\n");
7735 $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
7736 my ($pid, $host) = ($1, $2);
7737 if($host eq ::hostname()) {
7738 if(not kill 0, $1) {
7739 ::debug("sem", "Dead: $d");
7740 unlink $d;
7741 } else {
7742 ::debug("sem", "Alive: $d");
7743 }
7744 }
7745 }
7746 # try again
7747 $self->atomic_link_if_count_less_than() and last;
7748 # Retry slower and slower up to 1 second
7749 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
7750 # Random to avoid every sleeping job waking up at the same time
7751 ::usleep(rand()*$sleep);
7752 if(defined($opt::timeout) and
7753 $start_time + $opt::timeout > time) {
7754 # Acquire the lock anyway
7755 if(not -e $self->{'idfile'}) {
7756 open (my $fh, ">", $self->{'idfile'}) or
7757 ::die_bug("timeout_write_idfile: $self->{'idfile'}");
7758 close $fh;
7759 }
7760 link $self->{'idfile'}, $self->{'pidfile'};
7761 last;
7762 }
7763 }
7764 ::debug("sem", "acquired $self->{'pid'}\n");
7765 }
7766
7767 sub release {
7768 my $self = shift;
7769 unlink $self->{'pidfile'};
7770 if($self->nlinks() == 1) {
7771 # This is the last link, so atomic cleanup
7772 $self->lock();
7773 if($self->nlinks() == 1) {
7774 unlink $self->{'idfile'};
7775 rmdir $self->{'lockdir'};
7776 }
7777 $self->unlock();
7778 }
7779 ::debug("run", "released $self->{'pid'}\n");
7780 }
7781
7782 sub _release {
7783 my $self = shift;
7784
7785 unlink $self->{'pidfile'};
7786 $self->lock();
7787 my $nlinks = $self->nlinks();
7788 ::debug("sem", $nlinks, "<", $self->{'count'});
7789 if($nlinks-- > 1) {
7790 unlink $self->{'idfile'};
7791 open (my $fh, ">", $self->{'idfile'}) or
7792 ::die_bug("write_idfile: $self->{'idfile'}");
7793 print $fh "#"x$nlinks;
7794 close $fh;
7795 } else {
7796 unlink $self->{'idfile'};
7797 rmdir $self->{'lockdir'};
7798 }
7799 $self->unlock();
7800 ::debug("sem", "released $self->{'pid'}\n");
7801 }
7802
7803 sub atomic_link_if_count_less_than {
7804 # Link $file1 to $file2 if nlinks to $file1 < $count
7805 my $self = shift;
7806 my $retval = 0;
7807 $self->lock();
7808 ::debug($self->nlinks(), "<", $self->{'count'});
7809 if($self->nlinks() < $self->{'count'}) {
7810 -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
7811 if(not -e $self->{'idfile'}) {
7812 open (my $fh, ">", $self->{'idfile'}) or
7813 ::die_bug("write_idfile: $self->{'idfile'}");
7814 close $fh;
7815 }
7816 $retval = link $self->{'idfile'}, $self->{'pidfile'};
7817 }
7818 $self->unlock();
7819 ::debug("run", "atomic $retval");
7820 return $retval;
7821 }
7822
7823 sub _atomic_link_if_count_less_than {
7824 # Link $file1 to $file2 if nlinks to $file1 < $count
7825 my $self = shift;
7826 my $retval = 0;
7827 $self->lock();
7828 my $nlinks = $self->nlinks();
7829 ::debug("sem", $nlinks, "<", $self->{'count'});
7830 if($nlinks++ < $self->{'count'}) {
7831 -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
7832 if(not -e $self->{'idfile'}) {
7833 open (my $fh, ">", $self->{'idfile'}) or
7834 ::die_bug("write_idfile: $self->{'idfile'}");
7835 close $fh;
7836 }
7837 open (my $fh, ">", $self->{'idfile'}) or
7838 ::die_bug("write_idfile: $self->{'idfile'}");
7839 print $fh "#"x$nlinks;
7840 close $fh;
7841 $retval = link $self->{'idfile'}, $self->{'pidfile'};
7842 }
7843 $self->unlock();
7844 ::debug("sem", "atomic $retval");
7845 return $retval;
7846 }
7847
7848 sub nlinks {
7849 my $self = shift;
7850 if(-e $self->{'idfile'}) {
7851 ::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n");
7852 return (stat(_))[3];
7853 } else {
7854 return 0;
7855 }
7856 }
7857
7858 sub lock {
7859 my $self = shift;
7860 my $sleep = 100; # 100 ms
7861 my $total_sleep = 0;
7862 $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
7863 my $locked = 0;
7864 while(not $locked) {
7865 if(tell($self->{'lockfh'}) == -1) {
7866 # File not open
7867 open($self->{'lockfh'}, ">", $self->{'lockfile'})
7868 or ::debug("run", "Cannot open $self->{'lockfile'}");
7869 }
7870 if($self->{'lockfh'}) {
7871 # File is open
7872 chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
7873 if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
7874 # The file is locked: No need to retry
7875 $locked = 1;
7876 last;
7877 } else {
7878 if ($! =~ m/Function not implemented/) {
7879 ::warning("flock: $!");
7880 ::warning("Will wait for a random while\n");
7881 ::usleep(rand(5000));
7882 # File cannot be locked: No need to retry
7883 $locked = 2;
7884 last;
7885 }
7886 }
7887 }
7888 # Locking failed in first round
7889 # Sleep and try again
7890 $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
7891 # Random to avoid every sleeping job waking up at the same time
7892 ::usleep(rand()*$sleep);
7893 $total_sleep += $sleep;
7894 if($opt::semaphoretimeout) {
7895 if($total_sleep/1000 > $opt::semaphoretimeout) {
7896 # Timeout: bail out
7897 ::warning("Semaphore timed out. Ignoring timeout.");
7898 $locked = 3;
7899 last;
7900 }
7901 } else {
7902 if($total_sleep/1000 > 30) {
7903 ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout.");
7904 }
7905 }
7906 }
7907 ::debug("run", "locked $self->{'lockfile'}");
7908 }
7909
7910 sub unlock {
7911 my $self = shift;
7912 unlink $self->{'lockfile'};
7913 close $self->{'lockfh'};
7914 ::debug("run", "unlocked\n");
7915 }
7916
7917 sub mkdir_or_die {
7918 # If dir is not writable: die
7919 my $dir = shift;
7920 my @dir_parts = split(m:/:,$dir);
7921 my ($ddir,$part);
7922 while(defined ($part = shift @dir_parts)) {
7923 $part eq "" and next;
7924 $ddir .= "/".$part;
7925 -d $ddir and next;
7926 mkdir $ddir;
7927 }
7928 if(not -w $dir) {
7929 ::error("Cannot write to $dir: $!\n");
7930 ::wait_and_exit(255);
7931 }
7932 }
7933
7934 # Keep perl -w happy
7935 $opt::x = $Semaphore::timeout = $Semaphore::wait =
7936 $Job::file_descriptor_warning_printed = 0;