diff --git a/.github/workflows/qa-tests.yml b/.github/workflows/qa-tests.yml index 6adb077794..0682f03674 100644 --- a/.github/workflows/qa-tests.yml +++ b/.github/workflows/qa-tests.yml @@ -37,7 +37,7 @@ jobs: - name: Pre run: | apt update -y - apt install -y build-essential clang-format parallel libgflags-dev liblz4-dev libsnappy-dev libzstd-dev python3 python3-pip curl + apt install -y build-essential clang-format git parallel libgflags-dev liblz4-dev libsnappy-dev libzstd-dev python3 python3-pip curl - name: Checkout uses: actions/checkout@v3 diff --git a/COPYING b/COPYING deleted file mode 100644 index d159169d10..0000000000 --- a/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/Makefile b/Makefile index 31c1e8d302..7b4aa287c8 100644 --- a/Makefile +++ b/Makefile @@ -947,7 +947,7 @@ else ifeq ($(QUIET_PARALLEL_TESTS), 1) parallel_redir = >& t/$(test_log_prefix)log-{/} else # Default: print failure output only, as it happens -# Note: gnu_parallel --eta is now always used, but has been modified to provide +# Note: parallel --eta is now always used, but has been modified to provide # only infrequent updates when not connected to a terminal. (CircleCI will # kill a job if no output for 10min.) parallel_redir = >& t/$(test_log_prefix)log-{/} || bash -c "cat t/$(test_log_prefix)log-{/}; exit $$?" @@ -966,7 +966,7 @@ check_0: gen_parallel_tests | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ | grep -E -v '$(EXCLUDE_TESTS_REGEX)' \ - | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu '{} $(parallel_redir)' ; \ + | parallel -j$(J) --plain --joblog=LOG --eta --gnu '{} $(parallel_redir)' ; \ parallel_retcode=$$? ; \ awk '{ if ($$7 != 0 || $$8 != 0) { if ($$7 == "Exitval") { h = $$0; } else { if (!f) print h; print; f = 1 } } } END { if(f) exit 1; }' < LOG ; \ awk_retcode=$$?; \ @@ -988,7 +988,7 @@ valgrind_check_0: gen_parallel_tests | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ | grep -E -v '$(valgrind-exclude-regexp)' \ - | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu \ + | parallel -j$(J) --plain --joblog=LOG --eta --gnu \ '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) \ $(parallel_redir)' \ @@ -1010,9 +1010,10 @@ dump-log: # If J != 1 and GNU parallel is installed, run the tests in parallel, # via the check_0 rule above. Otherwise, run them sequentially. check: all - $(AM_V_GEN)if test "$(J)" != 1 \ - && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ - grep -q 'GNU Parallel'; \ + $(AM_V_GEN)if [ "$(J)" != "1" ] \ + && command -v parallel 2>&1 >/dev/null \ + && (parallel --gnu --version 2>/dev/null) | \ + grep -q 'Ole Tange'; \ then \ $(MAKE) TMPD=$(TMPD) check_0; \ else \ @@ -1114,9 +1115,10 @@ valgrind_test_some: ROCKSDB_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check_some valgrind_check: $(TESTS) - $(AM_V_GEN)if test "$(J)" != 1 \ - && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ - grep -q 'GNU Parallel'; \ + $(AM_V_GEN)if [ "$(J)" != "1" ] \ + && command -v parallel 2>&1 >/dev/null \ + && (parallel --gnu --version 2>/dev/null) | \ + grep -q 'Ole Tange'; \ then \ $(MAKE) TMPD=$(TMPD) \ DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" valgrind_check_0; \ @@ -1141,14 +1143,22 @@ valgrind_check_some: $(ROCKSDBTESTS_SUBSET) ifneq ($(PAR_TEST),) parloop: + $(AM_V_GEN)if !(case "$(J)" in [!0-9%]*|"") false;; esac && \ + [ "$(J)" -gt 1 ]) \ + || !command -v parallel 2>&1 >/dev/null \ + || !(parallel --gnu --version 2>/dev/null | \ + grep -q 'Ole Tange'); \ + then \ + echo "Need to have GNU Parallel and J > 1"; exit 1; \ + fi; \ ret_bad=0; \ for t in $(PAR_TEST); do \ echo "===== Running $$t in parallel $(NUM_PAR) (`date`)";\ if [ $(db_test) -eq 1 ]; then \ - seq $(J) | v="$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \ + seq $(J) | v="$$t" parallel --gnu --plain 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \ 'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \ else\ - seq $(J) | v="./$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{};' \ + seq $(J) | v="./$$t" parallel --gnu --plain 's=$(TMPD)/rdb-{};' \ 'export TEST_TMPDIR=$$s; timeout 10m $$v >> $$s/log-{} 2>1'; \ fi; \ ret_code=$$?; \ @@ -1165,9 +1175,11 @@ test_names = \ awk '/^[^ ]/ { prefix = $$1 } /^[ ]/ { print prefix $$1 }' parallel_check: $(TESTS) - $(AM_V_GEN)if test "$(J)" > 1 \ - && (build_tools/gnu_parallel --gnu --help 2>/dev/null) | \ - grep -q 'GNU Parallel'; \ + $(AM_V_GEN)if !(case "$(J)" in [!0-9%]*|"") false;; esac && \ + [ "$(J)" -gt 1 ]) \ + && command -v parallel 2>&1 >/dev/null \ + && (parallel --gnu --version 2>/dev/null) | \ + grep -q 'Ole Tange'; \ then \ echo Running in parallel $(J); \ else \ @@ -1176,7 +1188,7 @@ parallel_check: $(TESTS) ret_bad=0; \ echo $(J);\ echo Test Dir: $(TMPD); \ - seq $(J) | build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \ + seq $(J) | parallel --gnu --plain 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \ $(MAKE) PAR_TEST="$(shell $(test_names))" TMPD=$(TMPD) \ J=$(J) db_test=1 parloop; \ $(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \ diff --git a/build_tools/gnu_parallel b/build_tools/gnu_parallel deleted file mode 100755 index 757b25f11d..0000000000 --- a/build_tools/gnu_parallel +++ /dev/null @@ -1,7970 +0,0 @@ -#!/usr/bin/env perl - -# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and -# Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see -# or write to the Free Software Foundation, Inc., 51 Franklin St, -# Fifth Floor, Boston, MA 02110-1301 USA - -# open3 used in Job::start -use IPC::Open3; -# &WNOHANG used in reaper -use POSIX qw(:sys_wait_h setsid ceil :errno_h); -# gensym used in Job::start -use Symbol qw(gensym); -# tempfile used in Job::start -use File::Temp qw(tempfile tempdir); -# mkpath used in openresultsfile -use File::Path; -# GetOptions used in get_options_from_array -use Getopt::Long; -# Used to ensure code quality -use strict; -use File::Basename; - -if(not $ENV{HOME}) { - # $ENV{HOME} is sometimes not set if called from PHP - ::warning("\$HOME not set. Using /tmp\n"); - $ENV{HOME} = "/tmp"; -} - -save_stdin_stdout_stderr(); -save_original_signal_handler(); -parse_options(); -::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); -my $number_of_args; -if($Global::max_number_of_args) { - $number_of_args=$Global::max_number_of_args; -} elsif ($opt::X or $opt::m or $opt::xargs) { - $number_of_args = undef; -} else { - $number_of_args = 1; -} - -my @command; -@command = @ARGV; - -my @fhlist; -if($opt::pipepart) { - @fhlist = map { open_or_exit($_) } "/dev/null"; -} else { - @fhlist = map { open_or_exit($_) } @opt::a; - if(not @fhlist and not $opt::pipe) { - @fhlist = (*STDIN); - } -} - -if($opt::skip_first_line) { - # Skip the first line for the first file handle - my $fh = $fhlist[0]; - <$fh>; -} -if($opt::header and not $opt::pipe) { - my $fh = $fhlist[0]; - # split with colsep or \t - # $header force $colsep = \t if undef? - my $delimiter = $opt::colsep; - $delimiter ||= "\$"; - my $id = 1; - for my $fh (@fhlist) { - my $line = <$fh>; - chomp($line); - ::debug("init", "Delimiter: '$delimiter'"); - for my $s (split /$delimiter/o, $line) { - ::debug("init", "Colname: '$s'"); - # Replace {colname} with {2} - # TODO accept configurable short hands - # TODO how to deal with headers in {=...=} - for(@command) { - s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; - } - $Global::input_source_header{$id} = $s; - $id++; - } - } -} else { - my $id = 1; - for my $fh (@fhlist) { - $Global::input_source_header{$id} = $id; - $id++; - } -} - -if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { - # Parallel check all hosts are up. Remove hosts that are down - filter_hosts(); -} - -if($opt::nonall or $opt::onall) { - onall(@command); - wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); -} - -# TODO --transfer foo/./bar --cleanup -# multiple --transfer and --basefile with different /./ - -$Global::JobQueue = JobQueue->new( - \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); - -if($opt::eta or $opt::bar) { - # Count the number of jobs before starting any - $Global::JobQueue->total_jobs(); -} -if($opt::pipepart) { - @Global::cat_partials = map { pipe_part_files($_) } @opt::a; - # Unget the command as many times as there are parts - $Global::JobQueue->{'commandlinequeue'}->unget( - map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials - ); -} -for my $sshlogin (values %Global::host) { - $sshlogin->max_jobs_running(); -} - -init_run_jobs(); -my $sem; -if($Global::semaphore) { - $sem = acquire_semaphore(); -} -$SIG{TERM} = \&start_no_new_jobs; - -start_more_jobs(); -if(not $opt::pipepart) { - if($opt::pipe) { - spreadstdin(); - } -} -::debug("init", "Start draining\n"); -drain_job_queue(); -::debug("init", "Done draining\n"); -reaper(); -::debug("init", "Done reaping\n"); -if($opt::pipe and @opt::a) { - for my $job (@Global::tee_jobs) { - unlink $job->fh(2,"name"); - $job->set_fh(2,"name",""); - $job->print(); - unlink $job->fh(1,"name"); - } -} -::debug("init", "Cleaning\n"); -cleanup(); -if($Global::semaphore) { - $sem->release(); -} -for(keys %Global::sshmaster) { - kill "TERM", $_; -} -::debug("init", "Halt\n"); -if($opt::halt_on_error) { - wait_and_exit($Global::halt_on_error_exitstatus); -} else { - wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); -} - -sub __PIPE_MODE__ {} - -sub pipe_part_files { - # Input: - # $file = the file to read - # Returns: - # @commands that will cat_partial each part - my ($file) = @_; - my $buf = ""; - my $header = find_header(\$buf,open_or_exit($file)); - # find positions - my @pos = find_split_positions($file,$opt::blocksize,length $header); - # Make @cat_partials - my @cat_partials = (); - for(my $i=0; $i<$#pos; $i++) { - push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); - } - # Remote exec should look like: - # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) - # ssh -tt not allowed. Remote will die due to broken pipe anyway. - # TODO test remote with --fifo / --cat - return @cat_partials; -} - -sub find_header { - # Input: - # $buf_ref = reference to read-in buffer - # $fh = filehandle to read from - # Uses: - # $opt::header - # $opt::blocksize - # Returns: - # $header string - my ($buf_ref, $fh) = @_; - my $header = ""; - if($opt::header) { - if($opt::header eq ":") { $opt::header = "(.*\n)"; } - # Number = number of lines - $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; - while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { - if($$buf_ref=~s/^($opt::header)//) { - $header = $1; - last; - } - } - } - return $header; -} - -sub find_split_positions { - # Input: - # $file = the file to read - # $block = (minimal) --block-size of each chunk - # $headerlen = length of header to be skipped - # Uses: - # $opt::recstart - # $opt::recend - # Returns: - # @positions of block start/end - my($file, $block, $headerlen) = @_; - my $size = -s $file; - $block = int $block; - # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 - # The optimal dd blocksize for freebsd = 2^15..2^17 - my $dd_block_size = 131072; # 2^17 - my @pos; - my ($recstart,$recend) = recstartrecend(); - my $recendrecstart = $recend.$recstart; - my $fh = ::open_or_exit($file); - push(@pos,$headerlen); - for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { - my $buf; - seek($fh, $pos, 0) || die; - while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { - if($opt::regexp) { - # If match /$recend$recstart/ => Record position - if($buf =~ /(.*$recend)$recstart/os) { - my $i = length($1); - push(@pos,$pos+$i); - # Start looking for next record _after_ this match - $pos += $i; - last; - } - } else { - # If match $recend$recstart => Record position - my $i = index($buf,$recendrecstart); - if($i != -1) { - push(@pos,$pos+$i); - # Start looking for next record _after_ this match - $pos += $i; - last; - } - } - } - } - push(@pos,$size); - close $fh; - return @pos; -} - -sub cat_partial { - # Input: - # $file = the file to read - # ($start, $end, [$start2, $end2, ...]) = start byte, end byte - # Returns: - # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout - my($file, @start_end) = @_; - my($start, $i); - # Convert start_end to start_len - my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; - return "<". shell_quote_scalar($file) . - q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . - " @start_len"; -} - -sub spreadstdin { - # read a record - # Spawn a job and print the record to it. - # Uses: - # $opt::blocksize - # STDIN - # $opr::r - # $Global::max_lines - # $Global::max_number_of_args - # $opt::regexp - # $Global::start_no_new_jobs - # $opt::roundrobin - # %Global::running - - my $buf = ""; - my ($recstart,$recend) = recstartrecend(); - my $recendrecstart = $recend.$recstart; - my $chunk_number = 1; - my $one_time_through; - my $blocksize = $opt::blocksize; - my $in = *STDIN; - my $header = find_header(\$buf,$in); - while(1) { - my $anything_written = 0; - if(not read($in,substr($buf,length $buf,0),$blocksize)) { - # End-of-file - $chunk_number != 1 and last; - # Force the while-loop once if everything was read by header reading - $one_time_through++ and last; - } - if($opt::r) { - # Remove empty lines - $buf =~ s/^\s*\n//gm; - if(length $buf == 0) { - next; - } - } - if($Global::max_lines and not $Global::max_number_of_args) { - # Read n-line records - my $n_lines = $buf =~ tr/\n/\n/; - my $last_newline_pos = rindex($buf,"\n"); - while($n_lines % $Global::max_lines) { - $n_lines--; - $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1); - } - # Chop at $last_newline_pos as that is where n-line record ends - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$last_newline_pos+1); - substr($buf,0,$last_newline_pos+1) = ""; - } elsif($opt::regexp) { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - # -L -N => (start..*?end){n*l} - my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); - while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { - # Copy to modifiable variable - my $b = $1; - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$b, - $recstart,$recend,length $1); - } - } else { - # Find the last recend-recstart in $buf - if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { - # Copy to modifiable variable - my $b = $1; - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$b, - $recstart,$recend,length $1); - } - } - } else { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - my $i = 0; - my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); - while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { - $i += length $recend; # find the actual splitting location - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$i); - substr($buf,0,$i) = ""; - } - } else { - # Find the last recend-recstart in $buf - my $i = rindex($buf,$recendrecstart); - if($i != -1) { - $i += length $recend; # find the actual splitting location - $anything_written += - write_record_to_pipe($chunk_number++,\$header,\$buf, - $recstart,$recend,$i); - substr($buf,0,$i) = ""; - } - } - } - if(not $anything_written and not eof($in)) { - # Nothing was written - maybe the block size < record size? - # Increase blocksize exponentially - my $old_blocksize = $blocksize; - $blocksize = ceil($blocksize * 1.3 + 1); - ::warning("A record was longer than $old_blocksize. " . - "Increasing to --blocksize $blocksize\n"); - } - } - ::debug("init", "Done reading input\n"); - - # If there is anything left in the buffer write it - substr($buf,0,0) = ""; - write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); - - $Global::start_no_new_jobs ||= 1; - if($opt::roundrobin) { - for my $job (values %Global::running) { - close $job->fh(0,"w"); - } - my %incomplete_jobs = %Global::running; - my $sleep = 1; - while(keys %incomplete_jobs) { - my $something_written = 0; - for my $pid (keys %incomplete_jobs) { - my $job = $incomplete_jobs{$pid}; - if($job->stdin_buffer_length()) { - $something_written += $job->non_block_write(); - } else { - delete $incomplete_jobs{$pid} - } - } - if($something_written) { - $sleep = $sleep/2+0.001; - } - $sleep = ::reap_usleep($sleep); - } - } -} - -sub recstartrecend { - # Uses: - # $opt::recstart - # $opt::recend - # Returns: - # $recstart,$recend with default values and regexp conversion - my($recstart,$recend); - if(defined($opt::recstart) and defined($opt::recend)) { - # If both --recstart and --recend is given then both must match - $recstart = $opt::recstart; - $recend = $opt::recend; - } elsif(defined($opt::recstart)) { - # If --recstart is given it must match start of record - $recstart = $opt::recstart; - $recend = ""; - } elsif(defined($opt::recend)) { - # If --recend is given then it must match end of record - $recstart = ""; - $recend = $opt::recend; - } - - if($opt::regexp) { - # If $recstart/$recend contains '|' this should only apply to the regexp - $recstart = "(?:".$recstart.")"; - $recend = "(?:".$recend.")"; - } else { - # $recstart/$recend = printf strings (\n) - $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; - $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; - } - return ($recstart,$recend); -} - -sub nindex { - # See if string is in buffer N times - # Returns: - # the position where the Nth copy is found - my ($buf_ref, $str, $n) = @_; - my $i = 0; - for(1..$n) { - $i = index($$buf_ref,$str,$i+1); - if($i == -1) { last } - } - return $i; -} - -{ - my @robin_queue; - - sub round_robin_write { - # Input: - # $header_ref = ref to $header string - # $block_ref = ref to $block to be written - # $recstart = record start string - # $recend = record end string - # $endpos = end position of $block - # Uses: - # %Global::running - my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; - my $something_written = 0; - my $block_passed = 0; - my $sleep = 1; - while(not $block_passed) { - # Continue flushing existing buffers - # until one is empty and a new block is passed - # Make a queue to spread the blocks evenly - if(not @robin_queue) { - push @robin_queue, values %Global::running; - } - while(my $job = shift @robin_queue) { - if($job->stdin_buffer_length() > 0) { - $something_written += $job->non_block_write(); - } else { - $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); - $block_passed = 1; - $job->set_virgin(0); - $something_written += $job->non_block_write(); - last; - } - } - $sleep = ::reap_usleep($sleep); - } - return $something_written; - } -} - -sub write_record_to_pipe { - # Fork then - # Write record from pos 0 .. $endpos to pipe - # Input: - # $chunk_number = sequence number - to see if already run - # $header_ref = reference to header string to prepend - # $record_ref = reference to record to write - # $recstart = start string of record - # $recend = end string of record - # $endpos = position in $record_ref where record ends - # Uses: - # $Global::job_already_run - # $opt::roundrobin - # @Global::virgin_jobs - # Returns: - # Number of chunks written (0 or 1) - my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; - if($endpos == 0) { return 0; } - if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } - if($opt::roundrobin) { - return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); - } - # If no virgin found, backoff - my $sleep = 0.0001; # 0.01 ms - better performance on highend - while(not @Global::virgin_jobs) { - ::debug("pipe", "No virgin jobs"); - $sleep = ::reap_usleep($sleep); - # Jobs may not be started because of loadavg - # or too little time between each ssh login. - start_more_jobs(); - } - my $job = shift @Global::virgin_jobs; - # Job is no longer virgin - $job->set_virgin(0); - if(fork()) { - # Skip - } else { - # Chop of at $endpos as we do not know how many rec_sep will - # be removed. - substr($$record_ref,$endpos,length $$record_ref) = ""; - # Remove rec_sep - if($opt::remove_rec_sep) { - Job::remove_rec_sep($record_ref,$recstart,$recend); - } - $job->write($header_ref); - $job->write($record_ref); - close $job->fh(0,"w"); - exit(0); - } - close $job->fh(0,"w"); - return 1; -} - -sub __SEM_MODE__ {} - -sub acquire_semaphore { - # Acquires semaphore. If needed: spawns to the background - # Uses: - # @Global::host - # Returns: - # The semaphore to be released when jobs is complete - $Global::host{':'} = SSHLogin->new(":"); - my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); - $sem->acquire(); - if($Semaphore::fg) { - # skip - } else { - # If run in the background, the PID will change - # therefore release and re-acquire the semaphore - $sem->release(); - if(fork()) { - exit(0); - } else { - # child - # Get a semaphore for this pid - ::die_bug("Can't start a new session: $!") if setsid() == -1; - $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); - $sem->acquire(); - } - } - return $sem; -} - -sub __PARSE_OPTIONS__ {} - -sub options_hash { - # Returns: - # %hash = the GetOptions config - return - ("debug|D=s" => \$opt::D, - "xargs" => \$opt::xargs, - "m" => \$opt::m, - "X" => \$opt::X, - "v" => \@opt::v, - "joblog=s" => \$opt::joblog, - "results|result|res=s" => \$opt::results, - "resume" => \$opt::resume, - "resume-failed|resumefailed" => \$opt::resume_failed, - "silent" => \$opt::silent, - #"silent-error|silenterror" => \$opt::silent_error, - "keep-order|keeporder|k" => \$opt::keeporder, - "group" => \$opt::group, - "g" => \$opt::retired, - "ungroup|u" => \$opt::ungroup, - "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, - "tmux" => \$opt::tmux, - "null|0" => \$opt::0, - "quote|q" => \$opt::q, - # Replacement strings - "parens=s" => \$opt::parens, - "rpl=s" => \@opt::rpl, - "plus" => \$opt::plus, - "I=s" => \$opt::I, - "extensionreplace|er=s" => \$opt::U, - "U=s" => \$opt::retired, - "basenamereplace|bnr=s" => \$opt::basenamereplace, - "dirnamereplace|dnr=s" => \$opt::dirnamereplace, - "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, - "seqreplace=s" => \$opt::seqreplace, - "slotreplace=s" => \$opt::slotreplace, - "jobs|j=s" => \$opt::jobs, - "delay=f" => \$opt::delay, - "sshdelay=f" => \$opt::sshdelay, - "load=s" => \$opt::load, - "noswap" => \$opt::noswap, - "max-line-length-allowed" => \$opt::max_line_length_allowed, - "number-of-cpus" => \$opt::number_of_cpus, - "number-of-cores" => \$opt::number_of_cores, - "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, - "shellquote|shell_quote|shell-quote" => \$opt::shellquote, - "nice=i" => \$opt::nice, - "timeout=s" => \$opt::timeout, - "tag" => \$opt::tag, - "tagstring|tag-string=s" => \$opt::tagstring, - "onall" => \$opt::onall, - "nonall" => \$opt::nonall, - "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, - "sshlogin|S=s" => \@opt::sshlogin, - "sshloginfile|slf=s" => \@opt::sshloginfile, - "controlmaster|M" => \$opt::controlmaster, - "return=s" => \@opt::return, - "trc=s" => \@opt::trc, - "transfer" => \$opt::transfer, - "cleanup" => \$opt::cleanup, - "basefile|bf=s" => \@opt::basefile, - "B=s" => \$opt::retired, - "ctrlc|ctrl-c" => \$opt::ctrlc, - "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, - "workdir|work-dir|wd=s" => \$opt::workdir, - "W=s" => \$opt::retired, - "tmpdir=s" => \$opt::tmpdir, - "tempdir=s" => \$opt::tmpdir, - "use-compress-program|compress-program=s" => \$opt::compress_program, - "use-decompress-program|decompress-program=s" => \$opt::decompress_program, - "compress" => \$opt::compress, - "tty" => \$opt::tty, - "T" => \$opt::retired, - "halt-on-error|halt=s" => \$opt::halt_on_error, - "H=i" => \$opt::retired, - "retries=i" => \$opt::retries, - "dry-run|dryrun" => \$opt::dryrun, - "progress" => \$opt::progress, - "eta" => \$opt::eta, - "bar" => \$opt::bar, - "arg-sep|argsep=s" => \$opt::arg_sep, - "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, - "trim=s" => \$opt::trim, - "env=s" => \@opt::env, - "recordenv|record-env" => \$opt::record_env, - "plain" => \$opt::plain, - "profile|J=s" => \@opt::profile, - "pipe|spreadstdin" => \$opt::pipe, - "robin|round-robin|roundrobin" => \$opt::roundrobin, - "recstart=s" => \$opt::recstart, - "recend=s" => \$opt::recend, - "regexp|regex" => \$opt::regexp, - "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, - "files|output-as-files|outputasfiles" => \$opt::files, - "block|block-size|blocksize=s" => \$opt::blocksize, - "tollef" => \$opt::retired, - "gnu" => \$opt::gnu, - "xapply" => \$opt::xapply, - "bibtex" => \$opt::bibtex, - "nn|nonotice|no-notice" => \$opt::no_notice, - # xargs-compatibility - implemented, man, testsuite - "max-procs|P=s" => \$opt::jobs, - "delimiter|d=s" => \$opt::d, - "max-chars|s=i" => \$opt::max_chars, - "arg-file|a=s" => \@opt::a, - "no-run-if-empty|r" => \$opt::r, - "replace|i:s" => \$opt::i, - "E=s" => \$opt::eof, - "eof|e:s" => \$opt::eof, - "max-args|n=i" => \$opt::max_args, - "max-replace-args|N=i" => \$opt::max_replace_args, - "colsep|col-sep|C=s" => \$opt::colsep, - "help|h" => \$opt::help, - "L=f" => \$opt::L, - "max-lines|l:f" => \$opt::max_lines, - "interactive|p" => \$opt::p, - "verbose|t" => \$opt::verbose, - "version|V" => \$opt::version, - "minversion|min-version=i" => \$opt::minversion, - "show-limits|showlimits" => \$opt::show_limits, - "exit|x" => \$opt::x, - # Semaphore - "semaphore" => \$opt::semaphore, - "semaphoretimeout=i" => \$opt::semaphoretimeout, - "semaphorename|id=s" => \$opt::semaphorename, - "fg" => \$opt::fg, - "bg" => \$opt::bg, - "wait" => \$opt::wait, - # Shebang #!/usr/bin/parallel --shebang - "shebang|hashbang" => \$opt::shebang, - "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, - "Y" => \$opt::retired, - "skip-first-line" => \$opt::skip_first_line, - "header=s" => \$opt::header, - "cat" => \$opt::cat, - "fifo" => \$opt::fifo, - "pipepart|pipe-part" => \$opt::pipepart, - "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, - ); -} - -sub get_options_from_array { - # Run GetOptions on @array - # Input: - # $array_ref = ref to @ARGV to parse - # @keep_only = Keep only these options - # Uses: - # @ARGV - # Returns: - # true if parsing worked - # false if parsing failed - # @$array_ref is changed - my ($array_ref, @keep_only) = @_; - if(not @$array_ref) { - # Empty array: No need to look more at that - return 1; - } - # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not - # supported everywhere - my @save_argv; - my $this_is_ARGV = (\@::ARGV == $array_ref); - if(not $this_is_ARGV) { - @save_argv = @::ARGV; - @::ARGV = @{$array_ref}; - } - # If @keep_only set: Ignore all values except @keep_only - my %options = options_hash(); - if(@keep_only) { - my (%keep,@dummy); - @keep{@keep_only} = @keep_only; - for my $k (grep { not $keep{$_} } keys %options) { - # Store the value of the option in @dummy - $options{$k} = \@dummy; - } - } - my $retval = GetOptions(%options); - if(not $this_is_ARGV) { - @{$array_ref} = @::ARGV; - @::ARGV = @save_argv; - } - return $retval; -} - -sub parse_options { - # Returns: N/A - # Defaults: - $Global::version = 20141122; - $Global::progname = 'parallel'; - $Global::infinity = 2**31; - $Global::debug = 0; - $Global::verbose = 0; - $Global::quoting = 0; - # Read only table with default --rpl values - %Global::replace = - ( - '{}' => '', - '{#}' => '1 $_=$job->seq()', - '{%}' => '1 $_=$job->slot()', - '{/}' => 's:.*/::', - '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', - '{/.}' => 's:.*/::; s:\.[^/.]+$::;', - '{.}' => 's:\.[^/.]+$::', - ); - %Global::plus = - ( - # {} = {+/}/{/} - # = {.}.{+.} = {+/}/{/.}.{+.} - # = {..}.{+..} = {+/}/{/..}.{+..} - # = {...}.{+...} = {+/}/{/...}.{+...} - '{+/}' => 's:/[^/]*$::', - '{+.}' => 's:.*\.::', - '{+..}' => 's:.*\.([^.]*\.):$1:', - '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', - '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', - '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', - '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', - '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', - ); - # Modifiable copy of %Global::replace - %Global::rpl = %Global::replace; - $Global::parens = "{==}"; - $/="\n"; - $Global::ignore_empty = 0; - $Global::interactive = 0; - $Global::stderr_verbose = 0; - $Global::default_simultaneous_sshlogins = 9; - $Global::exitstatus = 0; - $Global::halt_on_error_exitstatus = 0; - $Global::arg_sep = ":::"; - $Global::arg_file_sep = "::::"; - $Global::trim = 'n'; - $Global::max_jobs_running = 0; - $Global::job_already_run = ''; - $ENV{'TMPDIR'} ||= "/tmp"; - - @ARGV=read_options(); - - if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 - $Global::debug = $opt::D; - $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; - if(defined $opt::X) { $Global::ContextReplace = 1; } - if(defined $opt::silent) { $Global::verbose = 0; } - if(defined $opt::0) { $/ = "\0"; } - if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; } - if(defined $opt::p) { $Global::interactive = $opt::p; } - if(defined $opt::q) { $Global::quoting = 1; } - if(defined $opt::r) { $Global::ignore_empty = 1; } - if(defined $opt::verbose) { $Global::stderr_verbose = 1; } - # Deal with --rpl - sub rpl { - # Modify %Global::rpl - # Replace $old with $new - my ($old,$new) = @_; - if($old ne $new) { - $Global::rpl{$new} = $Global::rpl{$old}; - delete $Global::rpl{$old}; - } - } - if(defined $opt::parens) { $Global::parens = $opt::parens; } - my $parenslen = 0.5*length $Global::parens; - $Global::parensleft = substr($Global::parens,0,$parenslen); - $Global::parensright = substr($Global::parens,$parenslen); - if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } - if(defined $opt::I) { rpl('{}',$opt::I); } - if(defined $opt::U) { rpl('{.}',$opt::U); } - if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } - if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } - if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } - if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } - if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } - if(defined $opt::basenameextensionreplace) { - rpl('{/.}',$opt::basenameextensionreplace); - } - for(@opt::rpl) { - # Create $Global::rpl entries for --rpl options - # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" - my ($shorthand,$long) = split/ /,$_,2; - $Global::rpl{$shorthand} = $long; - } - if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } - if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } - if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } - if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } - if(defined $opt::help) { die_usage(); } - if(defined $opt::colsep) { $Global::trim = 'lr'; } - if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } - if(defined $opt::trim) { $Global::trim = $opt::trim; } - if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } - if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } - if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } - if(defined $opt::number_of_cores) { - print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); - } - if(defined $opt::max_line_length_allowed) { - print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); - } - if(defined $opt::version) { version(); wait_and_exit(0); } - if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } - if(defined $opt::record_env) { record_env(); wait_and_exit(0); } - if(defined $opt::show_limits) { show_limits(); } - if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } - if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } - if(@opt::return) { push @Global::ret_files, @opt::return; } - if(not defined $opt::recstart and - not defined $opt::recend) { $opt::recend = "\n"; } - if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } - $opt::blocksize = multiply_binary_prefix($opt::blocksize); - if(defined $opt::controlmaster) { $opt::noctrlc = 1; } - if(defined $opt::semaphore) { $Global::semaphore = 1; } - if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } - if(defined $opt::semaphorename) { $Global::semaphore = 1; } - if(defined $opt::fg) { $Global::semaphore = 1; } - if(defined $opt::bg) { $Global::semaphore = 1; } - if(defined $opt::wait) { $Global::semaphore = 1; } - if(defined $opt::halt_on_error and - $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } - if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { - ::error("--timeout must be seconds or percentage\n"); - wait_and_exit(255); - } - if(defined $opt::minversion) { - print $Global::version,"\n"; - if($Global::version < $opt::minversion) { - wait_and_exit(255); - } else { - wait_and_exit(0); - } - } - if(not defined $opt::delay) { - # Set --delay to --sshdelay if not set - $opt::delay = $opt::sshdelay; - } - if($opt::compress_program) { - $opt::compress = 1; - $opt::decompress_program ||= $opt::compress_program." -dc"; - } - if($opt::compress) { - my ($compress, $decompress) = find_compression_program(); - $opt::compress_program ||= $compress; - $opt::decompress_program ||= $decompress; - } - if(defined $opt::nonall) { - # Append a dummy empty argument - push @ARGV, $Global::arg_sep, ""; - } - if(defined $opt::tty) { - # Defaults for --tty: -j1 -u - # Can be overridden with -jXXX -g - if(not defined $opt::jobs) { - $opt::jobs = 1; - } - if(not defined $opt::group) { - $opt::ungroup = 0; - } - } - if(@opt::trc) { - push @Global::ret_files, @opt::trc; - $opt::transfer = 1; - $opt::cleanup = 1; - } - if(defined $opt::max_lines) { - if($opt::max_lines eq "-0") { - # -l -0 (swallowed -0) - $opt::max_lines = 1; - $opt::0 = 1; - $/ = "\0"; - } elsif ($opt::max_lines == 0) { - # If not given (or if 0 is given) => 1 - $opt::max_lines = 1; - } - $Global::max_lines = $opt::max_lines; - if(not $opt::pipe) { - # --pipe -L means length of record - not max_number_of_args - $Global::max_number_of_args ||= $Global::max_lines; - } - } - - # Read more than one arg at a time (-L, -N) - if(defined $opt::L) { - $Global::max_lines = $opt::L; - if(not $opt::pipe) { - # --pipe -L means length of record - not max_number_of_args - $Global::max_number_of_args ||= $Global::max_lines; - } - } - if(defined $opt::max_replace_args) { - $Global::max_number_of_args = $opt::max_replace_args; - $Global::ContextReplace = 1; - } - if((defined $opt::L or defined $opt::max_replace_args) - and - not ($opt::xargs or $opt::m)) { - $Global::ContextReplace = 1; - } - if(defined $opt::tag and not defined $opt::tagstring) { - $opt::tagstring = "\257<\257>"; # Default = {} - } - if(defined $opt::pipepart and - (defined $opt::L or defined $opt::max_lines - or defined $opt::max_replace_args)) { - ::error("--pipepart is incompatible with --max-replace-args, ", - "--max-lines, and -L.\n"); - wait_and_exit(255); - } - if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { - # Deal with ::: and :::: - @ARGV=read_args_from_command_line(); - } - - # Semaphore defaults - # Must be done before computing number of processes and max_line_length - # because when running as a semaphore GNU Parallel does not read args - $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' - if($Global::semaphore) { - # A semaphore does not take input from neither stdin nor file - @opt::a = ("/dev/null"); - push(@Global::unget_argv, [Arg->new("")]); - $Semaphore::timeout = $opt::semaphoretimeout || 0; - if(defined $opt::semaphorename) { - $Semaphore::name = $opt::semaphorename; - } else { - $Semaphore::name = `tty`; - chomp $Semaphore::name; - } - $Semaphore::fg = $opt::fg; - $Semaphore::wait = $opt::wait; - $Global::default_simultaneous_sshlogins = 1; - if(not defined $opt::jobs) { - $opt::jobs = 1; - } - if($Global::interactive and $opt::bg) { - ::error("Jobs running in the ". - "background cannot be interactive.\n"); - ::wait_and_exit(255); - } - } - if(defined $opt::eta) { - $opt::progress = $opt::eta; - } - if(defined $opt::bar) { - $opt::progress = $opt::bar; - } - if(defined $opt::retired) { - ::error("-g has been retired. Use --group.\n"); - ::error("-B has been retired. Use --bf.\n"); - ::error("-T has been retired. Use --tty.\n"); - ::error("-U has been retired. Use --er.\n"); - ::error("-W has been retired. Use --wd.\n"); - ::error("-Y has been retired. Use --shebang.\n"); - ::error("-H has been retired. Use --halt.\n"); - ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); - ::wait_and_exit(255); - } - citation_notice(); - - parse_sshlogin(); - parse_env_var(); - - if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { - # As we do not know the max line length on the remote machine - # long commands generated by xargs may fail - # If opt_N is set, it is probably safe - ::warning("Using -X or -m with --sshlogin may fail.\n"); - } - - if(not defined $opt::jobs) { - $opt::jobs = "100%"; - } - open_joblog(); -} - -sub env_quote { - # Input: - # $v = value to quote - # Returns: - # $v = value quoted as environment variable - my $v = $_[0]; - $v =~ s/([\\])/\\$1/g; - $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g; - $v =~ s/\n/"\n"/g; - return $v; -} - -sub record_env { - # Record current %ENV-keys in ~/.parallel/ignored_vars - # Returns: N/A - my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; - if(open(my $vars_fh, ">", $ignore_filename)) { - print $vars_fh map { $_,"\n" } keys %ENV; - } else { - ::error("Cannot write to $ignore_filename\n"); - ::wait_and_exit(255); - } -} - -sub parse_env_var { - # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen - # - # Bash functions must be parsed to export them remotely - # Pre-shellshock style bash function: - # myfunc=() {... - # Post-shellshock style bash function: - # BASH_FUNC_myfunc()=() {... - # - # Uses: - # $Global::envvar = eval string that will set variables in both bash and csh - # $Global::envwarn = If functions are used: Give warning in csh - # $Global::envvarlen = length of $Global::envvar - # @opt::env - # $Global::shell - # %ENV - # Returns: N/A - $Global::envvar = ""; - $Global::envwarn = ""; - my @vars = ('parallel_bash_environment'); - for my $varstring (@opt::env) { - # Split up --env VAR1,VAR2 - push @vars, split /,/, $varstring; - } - if(grep { /^_$/ } @vars) { - # --env _ - # Include all vars that are not in a clean environment - if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { - my @ignore = <$vars_fh>; - chomp @ignore; - my %ignore; - @ignore{@ignore} = @ignore; - close $vars_fh; - push @vars, grep { not defined $ignore{$_} } keys %ENV; - @vars = grep { not /^_$/ } @vars; - } else { - ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); - ::wait_and_exit(255); - } - } - # Duplicate vars as BASH functions to include post-shellshock functions. - # So --env myfunc should also look for BASH_FUNC_myfunc() - @vars = map { $_, "BASH_FUNC_$_()" } @vars; - # Keep only defined variables - @vars = grep { defined($ENV{$_}) } @vars; - # Pre-shellshock style bash function: - # myfunc=() { echo myfunc - # } - # Post-shellshock style bash function: - # BASH_FUNC_myfunc()=() { echo myfunc - # } - my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; - my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; - if(@bash_functions) { - # Functions are not supported for all shells - if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { - ::warning("Shell functions may not be supported in $Global::shell\n"); - } - } - - # Pre-shellschock names are without () - my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions; - # Post-shellschock names are with () - my @bash_post_shellshock = grep { /\(\)/ } @bash_functions; - - my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } - grep { not /^parallel_bash_environment$/ } @non_functions); - my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } - @non_functions, @bash_pre_shellshock); - - push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; - push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; - - #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"' - #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a' - #'\}\ \;myfunc\ 1; - - # Check if any variables contain \n - if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) { - # \n is bad for csh and will cause it to fail. - $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn; - } - - if(not @qcsh) { push @qcsh, "true"; } - if(not @qbash) { push @qbash, "true"; } - # Create lines like: - # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2" - if(@vars) { - $Global::envvar .= - join"", - (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && } - . join(" && ", @qcsh) - . q{ || } - . join(" && ", @qbash) - .q{;}); - if($ENV{'parallel_bash_environment'}) { - $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; - } - } - $Global::envvarlen = length $Global::envvar; -} - -sub open_joblog { - # Open joblog as specified by --joblog - # Uses: - # $opt::resume - # $opt::resume_failed - # $opt::joblog - # $opt::results - # $Global::job_already_run - # %Global::fd - my $append = 0; - if(($opt::resume or $opt::resume_failed) - and - not ($opt::joblog or $opt::results)) { - ::error("--resume and --resume-failed require --joblog or --results.\n"); - ::wait_and_exit(255); - } - if($opt::joblog) { - if($opt::resume || $opt::resume_failed) { - if(open(my $joblog_fh, "<", $opt::joblog)) { - # Read the joblog - $append = <$joblog_fh>; # If there is a header: Open as append later - my $joblog_regexp; - if($opt::resume_failed) { - # Make a regexp that only matches commands with exit+signal=0 - # 4 host 1360490623.067 3.445 1023 1222 0 0 command - $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; - } else { - # Just match the job number - $joblog_regexp='^(\d+)'; - } - while(<$joblog_fh>) { - if(/$joblog_regexp/o) { - # This is 30% faster than set_job_already_run($1); - vec($Global::job_already_run,($1||0),1) = 1; - } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) { - ::error("Format of '$opt::joblog' is wrong: $_"); - ::wait_and_exit(255); - } - } - close $joblog_fh; - } - } - if($append) { - # Append to joblog - if(not open($Global::joblog, ">>", $opt::joblog)) { - ::error("Cannot append to --joblog $opt::joblog.\n"); - ::wait_and_exit(255); - } - } else { - if($opt::joblog eq "-") { - # Use STDOUT as joblog - $Global::joblog = $Global::fd{1}; - } elsif(not open($Global::joblog, ">", $opt::joblog)) { - # Overwrite the joblog - ::error("Cannot write to --joblog $opt::joblog.\n"); - ::wait_and_exit(255); - } - print $Global::joblog - join("\t", "Seq", "Host", "Starttime", "JobRuntime", - "Send", "Receive", "Exitval", "Signal", "Command" - ). "\n"; - } - } -} - -sub find_compression_program { - # Find a fast compression program - # Returns: - # $compress_program = compress program with options - # $decompress_program = decompress program with options - - # Search for these. Sorted by speed - my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2); - for my $p (@prg) { - if(which($p)) { - return ("$p -c -1","$p -dc"); - } - } - # Fall back to cat - return ("cat","cat"); -} - - -sub read_options { - # Read options from command line, profile and $PARALLEL - # Uses: - # $opt::shebang_wrap - # $opt::shebang - # @ARGV - # $opt::plain - # @opt::profile - # $ENV{'HOME'} - # $ENV{'PARALLEL'} - # Returns: - # @ARGV_no_opt = @ARGV without --options - - # This must be done first as this may exec myself - if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or - $ARGV[0] =~ /^--shebang-?wrap/ or - $ARGV[0] =~ /^--hashbang/)) { - # Program is called from #! line in script - # remove --shebang-wrap if it is set - $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); - # remove --shebang if it is set - $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); - # remove --hashbang if it is set - $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); - if($opt::shebang) { - my $argfile = shell_quote_scalar(pop @ARGV); - # exec myself to split $ARGV[0] into separate fields - exec "$0 --skip-first-line -a $argfile @ARGV"; - } - if($opt::shebang_wrap) { - my @options; - my @parser; - if ($^O eq 'freebsd') { - # FreeBSD's #! puts different values in @ARGV than Linux' does. - my @nooptions = @ARGV; - get_options_from_array(\@nooptions); - while($#ARGV > $#nooptions) { - push @options, shift @ARGV; - } - while(@ARGV and $ARGV[0] ne ":::") { - push @parser, shift @ARGV; - } - if(@ARGV and $ARGV[0] eq ":::") { - shift @ARGV; - } - } else { - @options = shift @ARGV; - } - my $script = shell_quote_scalar(shift @ARGV); - # exec myself to split $ARGV[0] into separate fields - exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; - } - } - - Getopt::Long::Configure("bundling","require_order"); - my @ARGV_copy = @ARGV; - # Check if there is a --profile to set @opt::profile - get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); - my @ARGV_profile = (); - my @ARGV_env = (); - if(not $opt::plain) { - # Add options from .parallel/config and other profiles - my @config_profiles = ( - "/etc/parallel/config", - $ENV{'HOME'}."/.parallel/config", - $ENV{'HOME'}."/.parallelrc"); - my @profiles = @config_profiles; - if(@opt::profile) { - # --profile overrides default profiles - @profiles = (); - for my $profile (@opt::profile) { - if(-r $profile) { - push @profiles, $profile; - } else { - push @profiles, $ENV{'HOME'}."/.parallel/".$profile; - } - } - } - for my $profile (@profiles) { - if(-r $profile) { - open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); - while(<$in_fh>) { - /^\s*\#/ and next; - chomp; - push @ARGV_profile, shellwords($_); - } - close $in_fh; - } else { - if(grep /^$profile$/, @config_profiles) { - # config file is not required to exist - } else { - ::error("$profile not readable.\n"); - wait_and_exit(255); - } - } - } - # Add options from shell variable $PARALLEL - if($ENV{'PARALLEL'}) { - @ARGV_env = shellwords($ENV{'PARALLEL'}); - } - } - Getopt::Long::Configure("bundling","require_order"); - get_options_from_array(\@ARGV_profile) || die_usage(); - get_options_from_array(\@ARGV_env) || die_usage(); - get_options_from_array(\@ARGV) || die_usage(); - - # Prepend non-options to @ARGV (such as commands like 'nice') - unshift @ARGV, @ARGV_profile, @ARGV_env; - return @ARGV; -} - -sub read_args_from_command_line { - # Arguments given on the command line after: - # ::: ($Global::arg_sep) - # :::: ($Global::arg_file_sep) - # Removes the arguments from @ARGV and: - # - puts filenames into -a - # - puts arguments into files and add the files to -a - # Input: - # @::ARGV = command option ::: arg arg arg :::: argfiles - # Uses: - # $Global::arg_sep - # $Global::arg_file_sep - # $opt::internal_pipe_means_argfiles - # $opt::pipe - # @opt::a - # Returns: - # @argv_no_argsep = @::ARGV without ::: and :::: and following args - my @new_argv = (); - for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { - if($arg eq $Global::arg_sep - or - $arg eq $Global::arg_file_sep) { - my $group = $arg; # This group of arguments is args or argfiles - my @group; - while(defined ($arg = shift @ARGV)) { - if($arg eq $Global::arg_sep - or - $arg eq $Global::arg_file_sep) { - # exit while loop if finding new separator - last; - } else { - # If not hitting ::: or :::: - # Append it to the group - push @group, $arg; - } - } - - if($group eq $Global::arg_file_sep - or ($opt::internal_pipe_means_argfiles and $opt::pipe) - ) { - # Group of file names on the command line. - # Append args into -a - push @opt::a, @group; - } elsif($group eq $Global::arg_sep) { - # Group of arguments on the command line. - # Put them into a file. - # Create argfile - my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); - unlink($name); - # Put args into argfile - print $outfh map { $_,$/ } @group; - seek $outfh, 0, 0; - # Append filehandle to -a - push @opt::a, $outfh; - } else { - ::die_bug("Unknown command line group: $group"); - } - if(defined($arg)) { - # $arg is ::: or :::: - redo; - } else { - # $arg is undef -> @ARGV empty - last; - } - } - push @new_argv, $arg; - } - # Output: @ARGV = command to run with options - return @new_argv; -} - -sub cleanup { - # Returns: N/A - if(@opt::basefile) { cleanup_basefile(); } -} - -sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} - -sub shell_quote { - # Input: - # @strings = strings to be quoted - # Output: - # @shell_quoted_strings = string quoted with \ as needed by the shell - my @strings = (@_); - for my $a (@strings) { - $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; - $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' - } - return wantarray ? @strings : "@strings"; -} - -sub shell_quote_empty { - # Inputs: - # @strings = strings to be quoted - # Returns: - # @quoted_strings = empty strings quoted as ''. - my @strings = shell_quote(@_); - for my $a (@strings) { - if($a eq "") { - $a = "''"; - } - } - return wantarray ? @strings : "@strings"; -} - -sub shell_quote_scalar { - # Quote the string so shell will not expand any special chars - # Inputs: - # $string = string to be quoted - # Returns: - # $shell_quoted = string quoted with \ as needed by the shell - my $a = $_[0]; - if(defined $a) { - # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; - # This is 1% faster than the above - $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; - $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' - } - return $a; -} - -sub shell_quote_file { - # Quote the string so shell will not expand any special chars and prepend ./ if needed - # Input: - # $filename = filename to be shell quoted - # Returns: - # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed - my $a = shell_quote_scalar(shift); - if(defined $a) { - if($a =~ m:^/: or $a =~ m:^\./:) { - # /abs/path or ./rel/path => skip - } else { - # rel/path => ./rel/path - $a = "./".$a; - } - } - return $a; -} - -sub shellwords { - # Input: - # $string = shell line - # Returns: - # @shell_words = $string split into words as shell would do - $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; - return Text::ParseWords::shellwords(@_); -} - - -sub __FILEHANDLES__ {} - - -sub save_stdin_stdout_stderr { - # Remember the original STDIN, STDOUT and STDERR - # and file descriptors opened by the shell (e.g. 3>/tmp/foo) - # Uses: - # %Global::fd - # $Global::original_stderr - # $Global::original_stdin - # Returns: N/A - - # Find file descriptors that are already opened (by the shell) - for my $fdno (1..61) { - # /dev/fd/62 and above are used by bash for <(cmd) - my $fh; - # 2-argument-open is used to be compatible with old perl 5.8.0 - # bug #43570: Perl 5.8.0 creates 61 files - if(open($fh,">&=$fdno")) { - $Global::fd{$fdno}=$fh; - } - } - open $Global::original_stderr, ">&", "STDERR" or - ::die_bug("Can't dup STDERR: $!"); - open $Global::original_stdin, "<&", "STDIN" or - ::die_bug("Can't dup STDIN: $!"); - $Global::is_terminal = (-t $Global::original_stderr) && !$ENV{'CIRCLECI'} && !$ENV{'TRAVIS'}; -} - -sub enough_file_handles { - # Check that we have enough filehandles available for starting - # another job - # Uses: - # $opt::ungroup - # %Global::fd - # Returns: - # 1 if ungrouped (thus not needing extra filehandles) - # 0 if too few filehandles - # 1 if enough filehandles - if(not $opt::ungroup) { - my %fh; - my $enough_filehandles = 1; - # perl uses 7 filehandles for something? - # open3 uses 2 extra filehandles temporarily - # We need a filehandle for each redirected file descriptor - # (normally just STDOUT and STDERR) - for my $i (1..(7+2+keys %Global::fd)) { - $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); - } - for (values %fh) { close $_; } - return $enough_filehandles; - } else { - # Ungrouped does not need extra file handles - return 1; - } -} - -sub open_or_exit { - # Open a file name or exit if the file cannot be opened - # Inputs: - # $file = filehandle or filename to open - # Uses: - # $Global::stdin_in_opt_a - # $Global::original_stdin - # Returns: - # $fh = file handle to read-opened file - my $file = shift; - if($file eq "-") { - $Global::stdin_in_opt_a = 1; - return ($Global::original_stdin || *STDIN); - } - if(ref $file eq "GLOB") { - # This is an open filehandle - return $file; - } - my $fh = gensym; - if(not open($fh, "<", $file)) { - ::error("Cannot open input file `$file': No such file or directory.\n"); - wait_and_exit(255); - } - return $fh; -} - -sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} - -# Variable structure: -# -# $Global::running{$pid} = Pointer to Job-object -# @Global::virgin_jobs = Pointer to Job-object that have received no input -# $Global::host{$sshlogin} = Pointer to SSHLogin-object -# $Global::total_running = total number of running jobs -# $Global::total_started = total jobs started - -sub init_run_jobs { - $Global::total_running = 0; - $Global::total_started = 0; - $Global::tty_taken = 0; - $SIG{USR1} = \&list_running_jobs; - $SIG{USR2} = \&toggle_progress; - if(@opt::basefile) { setup_basefile(); } -} - -{ - my $last_time; - my %last_mtime; - -sub start_more_jobs { - # Run start_another_job() but only if: - # * not $Global::start_no_new_jobs set - # * not JobQueue is empty - # * not load on server is too high - # * not server swapping - # * not too short time since last remote login - # Uses: - # $Global::max_procs_file - # $Global::max_procs_file_last_mod - # %Global::host - # @opt::sshloginfile - # $Global::start_no_new_jobs - # $opt::filter_hosts - # $Global::JobQueue - # $opt::pipe - # $opt::load - # $opt::noswap - # $opt::delay - # $Global::newest_starttime - # Returns: - # $jobs_started = number of jobs started - my $jobs_started = 0; - my $jobs_started_this_round = 0; - if($Global::start_no_new_jobs) { - return $jobs_started; - } - if(time - ($last_time||0) > 1) { - # At most do this every second - $last_time = time; - if($Global::max_procs_file) { - # --jobs filename - my $mtime = (stat($Global::max_procs_file))[9]; - if($mtime > $Global::max_procs_file_last_mod) { - # file changed: Force re-computing max_jobs_running - $Global::max_procs_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_jobs_running(undef); - } - } - } - if(@opt::sshloginfile) { - # Is --sshloginfile changed? - for my $slf (@opt::sshloginfile) { - my $actual_file = expand_slf_shorthand($slf); - my $mtime = (stat($actual_file))[9]; - $last_mtime{$actual_file} ||= $mtime; - if($mtime - $last_mtime{$actual_file} > 1) { - ::debug("run","--sshloginfile $actual_file changed. reload\n"); - $last_mtime{$actual_file} = $mtime; - # Reload $slf - # Empty sshlogins - @Global::sshlogin = (); - for (values %Global::host) { - # Don't start new jobs on any host - # except the ones added back later - $_->set_max_jobs_running(0); - } - # This will set max_jobs_running on the SSHlogins - read_sshloginfile($actual_file); - parse_sshlogin(); - $opt::filter_hosts and filter_hosts(); - setup_basefile(); - } - } - } - } - do { - $jobs_started_this_round = 0; - # This will start 1 job on each --sshlogin (if possible) - # thus distribute the jobs on the --sshlogins round robin - - for my $sshlogin (values %Global::host) { - if($Global::JobQueue->empty() and not $opt::pipe) { - # No more jobs in the queue - last; - } - debug("run", "Running jobs before on ", $sshlogin->string(), ": ", - $sshlogin->jobs_running(), "\n"); - if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { - if($opt::load and $sshlogin->loadavg_too_high()) { - # The load is too high or unknown - next; - } - if($opt::noswap and $sshlogin->swapping()) { - # The server is swapping - next; - } - if($sshlogin->too_fast_remote_login()) { - # It has been too short since - next; - } - if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { - # It has been too short since last start - next; - } - debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), - " out of ", $sshlogin->max_jobs_running(), - " jobs running. Start another.\n"); - if(start_another_job($sshlogin) == 0) { - # No more jobs to start on this $sshlogin - debug("run","No jobs started on ", $sshlogin->string(), "\n"); - next; - } - $sshlogin->inc_jobs_running(); - $sshlogin->set_last_login_at(::now()); - $jobs_started++; - $jobs_started_this_round++; - } - debug("run","Running jobs after on ", $sshlogin->string(), ": ", - $sshlogin->jobs_running(), " of ", - $sshlogin->max_jobs_running(), "\n"); - } - } while($jobs_started_this_round); - - return $jobs_started; -} -} - -{ - my $no_more_file_handles_warned; - -sub start_another_job { - # If there are enough filehandles - # and JobQueue not empty - # and not $job is in joblog - # Then grab a job from Global::JobQueue, - # start it at sshlogin - # mark it as virgin_job - # Inputs: - # $sshlogin = the SSHLogin to start the job on - # Uses: - # $Global::JobQueue - # $opt::pipe - # $opt::results - # $opt::resume - # @Global::virgin_jobs - # Returns: - # 1 if another jobs was started - # 0 otherwise - my $sshlogin = shift; - # Do we have enough file handles to start another job? - if(enough_file_handles()) { - if($Global::JobQueue->empty() and not $opt::pipe) { - # No more commands to run - debug("start", "Not starting: JobQueue empty\n"); - return 0; - } else { - my $job; - # Skip jobs already in job log - # Skip jobs already in results - do { - $job = get_job_with_sshlogin($sshlogin); - if(not defined $job) { - # No command available for that sshlogin - debug("start", "Not starting: no jobs available for ", - $sshlogin->string(), "\n"); - return 0; - } - } while ($job->is_already_in_joblog() - or - ($opt::results and $opt::resume and $job->is_already_in_results())); - debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", - $job->replaced(),"'\n"); - if($job->start()) { - if($opt::pipe) { - push(@Global::virgin_jobs,$job); - } - debug("start", "Started as seq ", $job->seq(), - " pid:", $job->pid(), "\n"); - return 1; - } else { - # Not enough processes to run the job. - # Put it back on the queue. - $Global::JobQueue->unget($job); - # Count down the number of jobs to run for this SSHLogin. - my $max = $sshlogin->max_jobs_running(); - if($max > 1) { $max--; } else { - ::error("No more processes: cannot run a single job. Something is wrong.\n"); - ::wait_and_exit(255); - } - $sshlogin->set_max_jobs_running($max); - # Sleep up to 300 ms to give other processes time to die - ::usleep(rand()*300); - ::warning("No more processes: ", - "Decreasing number of running jobs to $max. ", - "Raising ulimit -u or /etc/security/limits.conf may help.\n"); - return 0; - } - } - } else { - # No more file handles - $no_more_file_handles_warned++ or - ::warning("No more file handles. ", - "Raising ulimit -n or /etc/security/limits.conf may help.\n"); - return 0; - } -} -} - -$opt::min_progress_interval = 0; - -sub init_progress { - # Uses: - # $opt::bar - # Returns: - # list of computers for progress output - $|=1; - if (not $Global::is_terminal) { - $opt::min_progress_interval = 30; - } - if($opt::bar) { - return("",""); - } - my %progress = progress(); - return ("\nComputers / CPU cores / Max jobs to run\n", - $progress{'workerlist'}); -} - -sub drain_job_queue { - # Uses: - # $opt::progress - # $Global::original_stderr - # $Global::total_running - # $Global::max_jobs_running - # %Global::running - # $Global::JobQueue - # %Global::host - # $Global::start_no_new_jobs - # Returns: N/A - if($opt::progress) { - print $Global::original_stderr init_progress(); - } - my $last_header=""; - my $sleep = 0.2; - my $last_left = 1000000000; - my $last_progress_time = 0; - my $ps_reported = 0; - do { - while($Global::total_running > 0) { - debug($Global::total_running, "==", scalar - keys %Global::running," slots: ", $Global::max_jobs_running); - if($opt::pipe) { - # When using --pipe sometimes file handles are not closed properly - for my $job (values %Global::running) { - close $job->fh(0,"w"); - } - } - # When not connected to terminal, assume CI (e.g. CircleCI). In - # that case we want occasional progress output to prevent abort - # due to timeout with no output, but we also need to stop sending - # progress output if there has been no actual progress, so that - # the job can time out appropriately (CirecleCI: 10m) in case of - # a hung test. But without special output, it is extremely - # annoying to diagnose which test is hung, so we add that using - # `ps` below. - if($opt::progress and - ($Global::is_terminal or (time() - $last_progress_time) >= 30)) { - my %progress = progress(); - if($last_header ne $progress{'header'}) { - print $Global::original_stderr "\n", $progress{'header'}, "\n"; - $last_header = $progress{'header'}; - } - if ($Global::is_terminal) { - print $Global::original_stderr "\r",$progress{'status'}; - } - if ($last_left > $Global::left) { - if (not $Global::is_terminal) { - print $Global::original_stderr $progress{'status'},"\n"; - } - $last_progress_time = time(); - $ps_reported = 0; - } elsif (not $ps_reported and (time() - $last_progress_time) >= 60) { - # No progress in at least 60 seconds: run ps - print $Global::original_stderr "\n"; - system("ps", "-wf"); - $ps_reported = 1; - } - $last_left = $Global::left; - flush $Global::original_stderr; - } - if($Global::total_running < $Global::max_jobs_running - and not $Global::JobQueue->empty()) { - # These jobs may not be started because of loadavg - # or too little time between each ssh login. - if(start_more_jobs() > 0) { - # Exponential back-on if jobs were started - $sleep = $sleep/2+0.001; - } - } - # Sometimes SIGCHLD is not registered, so force reaper - $sleep = ::reap_usleep($sleep); - } - if(not $Global::JobQueue->empty()) { - # These jobs may not be started: - # * because there the --filter-hosts has removed all - if(not %Global::host) { - ::error("There are no hosts left to run on.\n"); - ::wait_and_exit(255); - } - # * because of loadavg - # * because of too little time between each ssh login. - start_more_jobs(); - $sleep = ::reap_usleep($sleep); - if($Global::max_jobs_running == 0) { - ::warning("There are no job slots available. Increase --jobs.\n"); - } - } - } while ($Global::total_running > 0 - or - not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); - if($opt::progress) { - my %progress = progress(); - print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n"; - flush $Global::original_stderr; - } -} - -sub toggle_progress { - # Turn on/off progress view - # Uses: - # $opt::progress - # $Global::original_stderr - # Returns: N/A - $opt::progress = not $opt::progress; - if($opt::progress) { - print $Global::original_stderr init_progress(); - } -} - -sub progress { - # Uses: - # $opt::bar - # $opt::eta - # %Global::host - # $Global::total_started - # Returns: - # $workerlist = list of workers - # $header = that will fit on the screen - # $status = message that will fit on the screen - if($opt::bar) { - return ("workerlist" => "", "header" => "", "status" => bar()); - } - my $eta = ""; - my ($status,$header)=("",""); - if($opt::eta) { - my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = - compute_eta(); - $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", - $this_eta, $left, $avgtime); - $Global::left = $left; - } - my $termcols = terminal_columns(); - my @workers = sort keys %Global::host; - my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; - my $workerno = 1; - my %workerno = map { ($_=>$workerno++) } @workers; - my $workerlist = ""; - for my $w (@workers) { - $workerlist .= - $workerno{$w}.":".$sshlogin{$w} ." / ". - ($Global::host{$w}->ncpus() || "-")." / ". - $Global::host{$w}->max_jobs_running()."\n"; - } - $status = "x"x($termcols+1); - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs - $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; - $status = $eta . - join(" ",map - { - if($Global::total_started) { - my $completed = ($Global::host{$_}->jobs_completed()||0); - my $running = $Global::host{$_}->jobs_running(); - my $time = $completed ? (time-$^T)/($completed) : "0"; - sprintf("%s:%d/%d/%d%%/%.1fs ", - $sshlogin{$_}, $running, $completed, - ($running+$completed)*100 - / $Global::total_started, $time); - } - } @workers); - } - if(length $status > $termcols) { - # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { - my $completed = ($Global::host{$_}->jobs_completed()||0); - my $running = $Global::host{$_}->jobs_running(); - my $time = $completed ? (time-$^T)/($completed) : "0"; - sprintf("%s:%d/%d/%d%%/%.1fs ", - $workerno{$_}, $running, $completed, - ($running+$completed)*100 - / $Global::total_started, $time); - } @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d/%d%%", - $sshlogin{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } - @workers); - } - if(length $status > $termcols) { - # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% - $header = "Computer:jobs running/jobs completed/%of started jobs"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d/%d%%", - $workerno{$_}, - $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0), - ($Global::host{$_}->jobs_running()+ - ($Global::host{$_}->jobs_completed()||0))*100 - / $Global::total_started) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $sshlogin{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $sshlogin{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX - $header = "Computer:jobs running/jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d/%d", - $workerno{$_}, $Global::host{$_}->jobs_running(), - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX - $header = "Computer:jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d", - $sshlogin{$_}, - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - if(length $status > $termcols) { - # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX - $header = "Computer:jobs completed"; - $status = $eta . - join(" ",map - { sprintf("%s:%d", - $workerno{$_}, - ($Global::host{$_}->jobs_completed()||0)) } - @workers); - } - return ("workerlist" => $workerlist, "header" => $header, "status" => $status); -} - -{ - my ($total, $first_completed, $smoothed_avg_time); - - sub compute_eta { - # Calculate important numbers for ETA - # Returns: - # $total = number of jobs in total - # $completed = number of jobs completed - # $left = number of jobs left - # $pctcomplete = percent of jobs completed - # $avgtime = averaged time - # $eta = smoothed eta - $total ||= $Global::JobQueue->total_jobs(); - my $completed = 0; - for(values %Global::host) { $completed += $_->jobs_completed() } - my $left = $total - $completed; - if(not $completed) { - return($total, $completed, $left, 0, 0, 0); - } - my $pctcomplete = $completed / $total; - $first_completed ||= time; - my $timepassed = (time - $first_completed); - my $avgtime = $timepassed / $completed; - $smoothed_avg_time ||= $avgtime; - # Smooth the eta so it does not jump wildly - $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + - $pctcomplete * $avgtime; - my $eta = int($left * $smoothed_avg_time); - return($total, $completed, $left, $pctcomplete, $avgtime, $eta); - } -} - -{ - my ($rev,$reset); - - sub bar { - # Return: - # $status = bar with eta, completed jobs, arg and pct - $rev ||= "\033[7m"; - $reset ||= "\033[0m"; - my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = - compute_eta(); - my $arg = $Global::newest_job ? - $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; - # These chars mess up display in the terminal - $arg =~ tr/[\011-\016\033\302-\365]//d; - my $bar_text = - sprintf("%d%% %d:%d=%ds %s", - $pctcomplete*100, $completed, $left, $eta, $arg); - my $terminal_width = terminal_columns(); - my $s = sprintf("%-${terminal_width}s", - substr($bar_text." "x$terminal_width, - 0,$terminal_width)); - my $width = int($terminal_width * $pctcomplete); - substr($s,$width,0) = $reset; - my $zenity = sprintf("%-${terminal_width}s", - substr("# $eta sec $arg", - 0,$terminal_width)); - $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header - "\r" . $rev . $s . $reset; - return $s; - } -} - -{ - my ($columns,$last_column_time); - - sub terminal_columns { - # Get the number of columns of the display - # Returns: - # number of columns of the screen - if(not $columns or $last_column_time < time) { - $last_column_time = time; - $columns = $ENV{'COLUMNS'}; - if(not $columns) { - my $resize = qx{ resize 2>/dev/null }; - $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; - } - $columns ||= 80; - } - return $columns; - } -} - -sub get_job_with_sshlogin { - # Returns: - # next job object for $sshlogin if any available - my $sshlogin = shift; - my $job = undef; - - if ($opt::hostgroups) { - my @other_hostgroup_jobs = (); - - while($job = $Global::JobQueue->get()) { - if($sshlogin->in_hostgroups($job->hostgroups())) { - # Found a job for this hostgroup - last; - } else { - # This job was not in the hostgroups of $sshlogin - push @other_hostgroup_jobs, $job; - } - } - $Global::JobQueue->unget(@other_hostgroup_jobs); - if(not defined $job) { - # No more jobs - return undef; - } - } else { - $job = $Global::JobQueue->get(); - if(not defined $job) { - # No more jobs - ::debug("start", "No more jobs: JobQueue empty\n"); - return undef; - } - } - - my $clean_command = $job->replaced(); - if($clean_command =~ /^\s*$/) { - # Do not run empty lines - if(not $Global::JobQueue->empty()) { - return get_job_with_sshlogin($sshlogin); - } else { - return undef; - } - } - $job->set_sshlogin($sshlogin); - if($opt::retries and $clean_command and - $job->failed_here()) { - # This command with these args failed for this sshlogin - my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); - # Only look at the Global::host that have > 0 jobslots - if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host - and $job->failed_here() == $min_failures) { - # It failed the same or more times on another host: - # run it on this host - } else { - # If it failed fewer times on another host: - # Find another job to run - my $nextjob; - if(not $Global::JobQueue->empty()) { - # This can potentially recurse for all args - no warnings 'recursion'; - $nextjob = get_job_with_sshlogin($sshlogin); - } - # Push the command back on the queue - $Global::JobQueue->unget($job); - return $nextjob; - } - } - return $job; -} - -sub __REMOTE_SSH__ {} - -sub read_sshloginfiles { - # Returns: N/A - for my $s (@_) { - read_sshloginfile(expand_slf_shorthand($s)); - } -} - -sub expand_slf_shorthand { - my $file = shift; - if($file eq "-") { - # skip: It is stdin - } elsif($file eq "..") { - $file = $ENV{'HOME'}."/.parallel/sshloginfile"; - } elsif($file eq ".") { - $file = "/etc/parallel/sshloginfile"; - } elsif(not -r $file) { - if(not -r $ENV{'HOME'}."/.parallel/".$file) { - # Try prepending ~/.parallel - ::error("Cannot open $file.\n"); - ::wait_and_exit(255); - } else { - $file = $ENV{'HOME'}."/.parallel/".$file; - } - } - return $file; -} - -sub read_sshloginfile { - # Returns: N/A - my $file = shift; - my $close = 1; - my $in_fh; - ::debug("init","--slf ",$file); - if($file eq "-") { - $in_fh = *STDIN; - $close = 0; - } else { - if(not open($in_fh, "<", $file)) { - # Try the filename - ::error("Cannot open $file.\n"); - ::wait_and_exit(255); - } - } - while(<$in_fh>) { - chomp; - /^\s*#/ and next; - /^\s*$/ and next; - push @Global::sshlogin, $_; - } - if($close) { - close $in_fh; - } -} - -sub parse_sshlogin { - # Returns: N/A - my @login; - if(not @Global::sshlogin) { @Global::sshlogin = (":"); } - for my $sshlogin (@Global::sshlogin) { - # Split up -S sshlogin,sshlogin - for my $s (split /,/, $sshlogin) { - if ($s eq ".." or $s eq "-") { - # This may add to @Global::sshlogin - possibly bug - read_sshloginfile(expand_slf_shorthand($s)); - } else { - push (@login, $s); - } - } - } - $Global::minimal_command_line_length = 8_000_000; - my @allowed_hostgroups; - for my $ncpu_sshlogin_string (::uniq(@login)) { - my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); - my $sshlogin_string = $sshlogin->string(); - if($sshlogin_string eq "") { - # This is an ssh group: -S @webservers - push @allowed_hostgroups, $sshlogin->hostgroups(); - next; - } - if($Global::host{$sshlogin_string}) { - # This sshlogin has already been added: - # It is probably a host that has come back - # Set the max_jobs_running back to the original - debug("run","Already seen $sshlogin_string\n"); - if($sshlogin->{'ncpus'}) { - # If ncpus set by '#/' of the sshlogin, overwrite it: - $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); - } - $Global::host{$sshlogin_string}->set_max_jobs_running(undef); - next; - } - if($sshlogin_string eq ":") { - $sshlogin->set_maxlength(Limits::Command::max_length()); - } else { - # If all chars needs to be quoted, every other character will be \ - $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); - } - $Global::minimal_command_line_length = - ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); - $Global::host{$sshlogin_string} = $sshlogin; - } - if(@allowed_hostgroups) { - # Remove hosts that are not in these groups - while (my ($string, $sshlogin) = each %Global::host) { - if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { - delete $Global::host{$string}; - } - } - } - - # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); - if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { - if(not remote_hosts()) { - # There are no remote hosts - if(@opt::trc) { - ::warning("--trc ignored as there are no remote --sshlogin.\n"); - } elsif (defined $opt::transfer) { - ::warning("--transfer ignored as there are no remote --sshlogin.\n"); - } elsif (@opt::return) { - ::warning("--return ignored as there are no remote --sshlogin.\n"); - } elsif (defined $opt::cleanup) { - ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); - } elsif (@opt::basefile) { - ::warning("--basefile ignored as there are no remote --sshlogin.\n"); - } - } - } -} - -sub remote_hosts { - # Return sshlogins that are not ':' - # Returns: - # list of sshlogins with ':' removed - return grep !/^:$/, keys %Global::host; -} - -sub setup_basefile { - # Transfer basefiles to each $sshlogin - # This needs to be done before first jobs on $sshlogin is run - # Returns: N/A - my $cmd = ""; - my $rsync_destdir; - my $workdir; - for my $sshlogin (values %Global::host) { - if($sshlogin->string() eq ":") { next } - for my $file (@opt::basefile) { - if($file !~ m:^/: and $opt::workdir eq "...") { - ::error("Work dir '...' will not work with relative basefiles\n"); - ::wait_and_exit(255); - } - $workdir ||= Job->new("")->workdir(); - $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; - } - } - $cmd .= "wait;"; - debug("init", "basesetup: $cmd\n"); - print `$cmd`; -} - -sub cleanup_basefile { - # Remove the basefiles transferred - # Returns: N/A - my $cmd=""; - my $workdir = Job->new("")->workdir(); - for my $sshlogin (values %Global::host) { - if($sshlogin->string() eq ":") { next } - for my $file (@opt::basefile) { - $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; - } - } - $cmd .= "wait;"; - debug("init", "basecleanup: $cmd\n"); - print `$cmd`; -} - -sub filter_hosts { - my(@cores, @cpus, @maxline, @echo); - my $envvar = ::shell_quote_scalar($Global::envvar); - while (my ($host, $sshlogin) = each %Global::host) { - if($host eq ":") { next } - # The 'true' is used to get the $host out later - my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); - push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); - push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); - push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); - # 'echo' is used to get the best possible value for an ssh login time - push(@echo, $host."\t".$sshcmd." echo\n\0"); - } - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); - print $fh @cores, @cpus, @maxline, @echo; - close $fh; - # --timeout 5: Setting up an SSH connection and running a simple - # command should never take > 5 sec. - # --delay 0.1: If multiple sshlogins use the same proxy the delay - # will make it less likely to overload the ssh daemon. - # --retries 3: If the ssh daemon it overloaded, try 3 times - # -s 16000: Half of the max line on UnixWare - my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; - ::debug("init", $cmd, "\n"); - open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); - my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); - my $prepend = ""; - while(<$host_fh>) { - if(/\'$/) { - # if last char = ' then append next line - # This may be due to quoting of $Global::envvar - $prepend .= $_; - next; - } - $_ = $prepend . $_; - $prepend = ""; - chomp; - my @col = split /\t/, $_; - if(defined $col[6]) { - # This is a line from --joblog - # seq host time spent sent received exit signal command - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores - if($col[0] eq "Seq" and $col[1] eq "Host" and - $col[2] eq "Starttime") { - # Header => skip - next; - } - # Get server from: eval true server\; - $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); - my $host = $1; - $host =~ tr/\\//d; - $Global::host{$host} or next; - if($col[6] eq "255" or $col[7] eq "15") { - # exit == 255 or signal == 15: ssh failed - # Remove sshlogin - ::debug("init", "--filtered $host\n"); - push(@down_hosts, $host); - @down_hosts = uniq(@down_hosts); - } elsif($col[6] eq "127") { - # signal == 127: parallel not installed remote - # Set ncpus and ncores = 1 - ::warning("Could not figure out ", - "number of cpus on $host. Using 1.\n"); - $ncores{$host} = 1; - $ncpus{$host} = 1; - $maxlen{$host} = Limits::Command::max_length(); - } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { - # Remember how log it took to log in - # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo - $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); - } else { - ::die_bug("host check unmatched long jobline: $_"); - } - } elsif($Global::host{$col[0]}) { - # This output from --number-of-cores, --number-of-cpus, - # --max-line-length-allowed - # ncores: server 8 - # ncpus: server 2 - # maxlen: server 131071 - if(not $ncores{$col[0]}) { - $ncores{$col[0]} = $col[1]; - } elsif(not $ncpus{$col[0]}) { - $ncpus{$col[0]} = $col[1]; - } elsif(not $maxlen{$col[0]}) { - $maxlen{$col[0]} = $col[1]; - } elsif(not $echo{$col[0]}) { - $echo{$col[0]} = $col[1]; - } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { - # Skip these: - # perl: warning: Setting locale failed. - # perl: warning: Please check that your locale settings: - # LANGUAGE = (unset), - # LC_ALL = (unset), - # LANG = "en_US.UTF-8" - # are supported and installed on your system. - # perl: warning: Falling back to the standard locale ("C"). - } else { - ::die_bug("host check too many col0: $_"); - } - } else { - ::die_bug("host check unmatched short jobline ($col[0]): $_"); - } - } - close $host_fh; - $Global::debug or unlink $tmpfile; - delete @Global::host{@down_hosts}; - @down_hosts and ::warning("Removed @down_hosts\n"); - $Global::minimal_command_line_length = 8_000_000; - while (my ($sshlogin, $obj) = each %Global::host) { - if($sshlogin eq ":") { next } - $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); - $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); - $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); - $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); - if($opt::use_cpus_instead_of_cores) { - $obj->set_ncpus($ncpus{$sshlogin}); - } else { - $obj->set_ncpus($ncores{$sshlogin}); - } - $obj->set_time_to_login($time_to_login{$sshlogin}); - $obj->set_maxlength($maxlen{$sshlogin}); - $Global::minimal_command_line_length = - ::min($Global::minimal_command_line_length, - int($maxlen{$sshlogin}/2)); - ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin}, - " ncores:", $ncores{$sshlogin}, - " time_to_login:", $time_to_login{$sshlogin}, - " maxlen:", $maxlen{$sshlogin}, - " min_max_len:", $Global::minimal_command_line_length,"\n"); - } -} - -sub onall { - sub tmp_joblog { - my $joblog = shift; - if(not defined $joblog) { - return undef; - } - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); - close $fh; - return $tmpfile; - } - my @command = @_; - if($Global::quoting) { - @command = shell_quote_empty(@command); - } - - # Copy all @fhlist into tempfiles - my @argfiles = (); - for my $fh (@fhlist) { - my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); - print $outfh (<$fh>); - close $outfh; - push @argfiles, $name; - } - if(@opt::basefile) { setup_basefile(); } - # for each sshlogin do: - # parallel -S $sshlogin $command :::: @argfiles - # - # Pass some of the options to the sub-parallels, not all of them as - # -P should only go to the first, and -S should not be copied at all. - my $options = - join(" ", - ((defined $opt::jobs) ? "-P $opt::jobs" : ""), - ((defined $opt::linebuffer) ? "--linebuffer" : ""), - ((defined $opt::ungroup) ? "-u" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::D) ? "-D $opt::D" : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - ); - my $suboptions = - join(" ", - ((defined $opt::ungroup) ? "-u" : ""), - ((defined $opt::linebuffer) ? "--linebuffer" : ""), - ((defined $opt::group) ? "-g" : ""), - ((defined $opt::files) ? "--files" : ""), - ((defined $opt::keeporder) ? "--keeporder" : ""), - ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), - ((@opt::v) ? "-vv" : ""), - ((defined $opt::D) ? "-D $opt::D" : ""), - ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), - ((defined $opt::plain) ? "--plain" : ""), - ((defined $opt::retries) ? "--retries ".$opt::retries : ""), - ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), - ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), - ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), - (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), - ); - ::debug("init", "| $0 $options\n"); - open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || - ::die_bug("This does not run GNU Parallel: $0 $options"); - my @joblogs; - for my $host (sort keys %Global::host) { - my $sshlogin = $Global::host{$host}; - my $joblog = tmp_joblog($opt::joblog); - if($joblog) { - push @joblogs, $joblog; - $joblog = "--joblog $joblog"; - } - my $quad = $opt::arg_file_sep || "::::"; - ::debug("init", "$0 $suboptions -j1 $joblog ", - ((defined $opt::tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), - " -S ", shell_quote_scalar($sshlogin->string())," ", - join(" ",shell_quote(@command))," $quad @argfiles\n"); - print $parallel_fh "$0 $suboptions -j1 $joblog ", - ((defined $opt::tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), - " -S ", shell_quote_scalar($sshlogin->string())," ", - join(" ",shell_quote(@command))," $quad @argfiles\n"; - } - close $parallel_fh; - $Global::exitstatus = $? >> 8; - debug("init", "--onall exitvalue ", $?); - if(@opt::basefile) { cleanup_basefile(); } - $Global::debug or unlink(@argfiles); - my %seen; - for my $joblog (@joblogs) { - # Append to $joblog - open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); - # Skip first line (header); - <$fh>; - print $Global::joblog (<$fh>); - close $fh; - unlink($joblog); - } -} - -sub __SIGNAL_HANDLING__ {} - -sub save_original_signal_handler { - # Remember the original signal handler - # Returns: N/A - $SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X - $SIG{INT} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } - unlink keys %Global::unlink; exit -1 }; - $SIG{TERM} = sub { if($opt::tmux) { qx { tmux kill-session -t p$$ }; } - unlink keys %Global::unlink; exit -1 }; - %Global::original_sig = %SIG; - $SIG{TERM} = sub {}; # Dummy until jobs really start -} - -sub list_running_jobs { - # Returns: N/A - for my $v (values %Global::running) { - print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n"; - } -} - -sub start_no_new_jobs { - # Returns: N/A - $SIG{TERM} = $Global::original_sig{TERM}; - print $Global::original_stderr - ("$Global::progname: SIGTERM received. No new jobs will be started.\n", - "$Global::progname: Waiting for these ", scalar(keys %Global::running), - " jobs to finish. Send SIGTERM again to stop now.\n"); - list_running_jobs(); - $Global::start_no_new_jobs ||= 1; -} - -sub reaper { - # A job finished. - # Print the output. - # Start another job - # Returns: N/A - my $stiff; - my $children_reaped = 0; - debug("run", "Reaper "); - while (($stiff = waitpid(-1, &WNOHANG)) > 0) { - $children_reaped++; - if($Global::sshmaster{$stiff}) { - # This is one of the ssh -M: ignore - next; - } - my $job = $Global::running{$stiff}; - # '-a <(seq 10)' will give us a pid not in %Global::running - $job or next; - $job->set_exitstatus($? >> 8); - $job->set_exitsignal($? & 127); - debug("run", "died (", $job->exitstatus(), "): ", $job->seq()); - $job->set_endtime(::now()); - if($stiff == $Global::tty_taken) { - # The process that died had the tty => release it - $Global::tty_taken = 0; - } - - if(not $job->should_be_retried()) { - # The job is done - # Free the jobslot - push @Global::slots, $job->slot(); - if($opt::timeout) { - # Update average runtime for timeout - $Global::timeoutq->update_delta_time($job->runtime()); - } - # Force printing now if the job failed and we are going to exit - my $print_now = ($opt::halt_on_error and $opt::halt_on_error == 2 - and $job->exitstatus()); - if($opt::keeporder and not $print_now) { - print_earlier_jobs($job); - } else { - $job->print(); - } - if($job->exitstatus()) { - process_failed_job($job); - } - - } - my $sshlogin = $job->sshlogin(); - $sshlogin->dec_jobs_running(); - $sshlogin->inc_jobs_completed(); - $Global::total_running--; - delete $Global::running{$stiff}; - start_more_jobs(); - } - debug("run", "done "); - return $children_reaped; -} - -sub process_failed_job { - # The jobs had a exit status <> 0, so error - # Returns: N/A - my $job = shift; - $Global::exitstatus++; - $Global::total_failed++; - if($opt::halt_on_error) { - if($opt::halt_on_error == 1 - or - ($opt::halt_on_error < 1 and $Global::total_failed > 3 - and - $Global::total_failed / $Global::total_started > $opt::halt_on_error)) { - # If halt on error == 1 or --halt 10% - # we should gracefully exit - print $Global::original_stderr - ("$Global::progname: Starting no more jobs. ", - "Waiting for ", scalar(keys %Global::running), - " jobs to finish. This job failed:\n", - $job->replaced(),"\n"); - $Global::start_no_new_jobs ||= 1; - $Global::halt_on_error_exitstatus = $job->exitstatus(); - } elsif($opt::halt_on_error == 2) { - # If halt on error == 2 we should exit immediately - print $Global::original_stderr - ("$Global::progname: This job failed:\n", - $job->replaced(),"\n"); - exit ($job->exitstatus()); - } - } -} - -{ - my (%print_later,$job_end_sequence); - - sub print_earlier_jobs { - # Print jobs completed earlier - # Returns: N/A - my $job = shift; - $print_later{$job->seq()} = $job; - $job_end_sequence ||= 1; - debug("run", "Looking for: $job_end_sequence ", - "Current: ", $job->seq(), "\n"); - for(my $j = $print_later{$job_end_sequence}; - $j or vec($Global::job_already_run,$job_end_sequence,1); - $job_end_sequence++, - $j = $print_later{$job_end_sequence}) { - debug("run", "Found job end $job_end_sequence"); - if($j) { - $j->print(); - delete $print_later{$job_end_sequence}; - } - } - } -} - -sub __USAGE__ {} - -sub wait_and_exit { - # If we do not wait, we sometimes get segfault - # Returns: N/A - my $error = shift; - if($error) { - # Kill all without printing - for my $job (values %Global::running) { - $job->kill("TERM"); - $job->kill("TERM"); - } - } - for (keys %Global::unkilled_children) { - kill 9, $_; - waitpid($_,0); - delete $Global::unkilled_children{$_}; - } - wait(); - exit($error); -} - -sub die_usage { - # Returns: N/A - usage(); - wait_and_exit(255); -} - -sub usage { - # Returns: N/A - print join - ("\n", - "Usage:", - "", - "$Global::progname [options] [command [arguments]] < list_of_arguments", - "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", - "cat ... | $Global::progname --pipe [options] [command [arguments]]", - "", - "-j n Run n jobs in parallel", - "-k Keep same order", - "-X Multiple arguments with context replace", - "--colsep regexp Split input on regexp for positional replacements", - "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", - "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", - "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", - " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", - "", - "-S sshlogin Example: foo\@server.example.com", - "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", - "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", - "--onall Run the given command with argument on all sshlogins", - "--nonall Run the given command with no arguments on all sshlogins", - "", - "--pipe Split stdin (standard input) to multiple jobs.", - "--recend str Record end separator for --pipe.", - "--recstart str Record start separator for --pipe.", - "", - "See 'man $Global::progname' for details", - "", - "When using programs that use GNU Parallel to process data for publication please cite:", - "", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", - ";login: The USENIX Magazine, February 2011:42-47.", - "", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", - ""); -} - - -sub citation_notice { - # if --no-notice or --plain: do nothing - # if stderr redirected: do nothing - # if ~/.parallel/will-cite: do nothing - # else: print citation notice to stderr - if($opt::no_notice - or - $opt::plain - or - not -t $Global::original_stderr - or - -e $ENV{'HOME'}."/.parallel/will-cite") { - # skip - } else { - print $Global::original_stderr - ("When using programs that use GNU Parallel to process data for publication please cite:\n", - "\n", - " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", - " ;login: The USENIX Magazine, February 2011:42-47.\n", - "\n", - "This helps funding further development; and it won't cost you a cent.\n", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", - "\n", - "To silence this citation notice run 'parallel --bibtex' once or use '--no-notice'.\n\n", - ); - flush $Global::original_stderr; - } -} - - -sub warning { - my @w = @_; - my $fh = $Global::original_stderr || *STDERR; - my $prog = $Global::progname || "parallel"; - print $fh $prog, ": Warning: ", @w; -} - - -sub error { - my @w = @_; - my $fh = $Global::original_stderr || *STDERR; - my $prog = $Global::progname || "parallel"; - print $fh $prog, ": Error: ", @w; -} - - -sub die_bug { - my $bugid = shift; - print STDERR - ("$Global::progname: This should not happen. You have found a bug.\n", - "Please contact and include:\n", - "* The version number: $Global::version\n", - "* The bugid: $bugid\n", - "* The command line being run\n", - "* The files being read (put the files on a webserver if they are big)\n", - "\n", - "If you get the error on smaller/fewer files, please include those instead.\n"); - ::wait_and_exit(255); -} - -sub version { - # Returns: N/A - if($opt::tollef and not $opt::gnu) { - print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; - } - print join("\n", - "GNU $Global::progname $Global::version", - "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and Free Software Foundation, Inc.", - "License GPLv3+: GNU GPL version 3 or later ", - "This is free software: you are free to change and redistribute it.", - "GNU $Global::progname comes with no warranty.", - "", - "Web site: http://www.gnu.org/software/${Global::progname}\n", - "When using programs that use GNU Parallel to process data for publication please cite:\n", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ", - ";login: The USENIX Magazine, February 2011:42-47.\n", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.\n", - ); -} - -sub bibtex { - # Returns: N/A - if($opt::tollef and not $opt::gnu) { - print "WARNING: YOU ARE USING --tollef. IF THINGS ARE ACTING WEIRD USE --gnu.\n"; - } - print join("\n", - "When using programs that use GNU Parallel to process data for publication please cite:", - "", - "\@article{Tange2011a,", - " title = {GNU Parallel - The Command-Line Power Tool},", - " author = {O. Tange},", - " address = {Frederiksberg, Denmark},", - " journal = {;login: The USENIX Magazine},", - " month = {Feb},", - " number = {1},", - " volume = {36},", - " url = {http://www.gnu.org/s/parallel},", - " year = {2011},", - " pages = {42-47}", - "}", - "", - "(Feel free to use \\nocite{Tange2011a})", - "", - "This helps funding further development.", - "", - "Or you can get GNU Parallel without this requirement by paying 10000 EUR.", - "" - ); - while(not -e $ENV{'HOME'}."/.parallel/will-cite") { - print "\nType: 'will cite' and press enter.\n> "; - my $input = ; - if($input =~ /will cite/i) { - mkdir $ENV{'HOME'}."/.parallel"; - open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite") - || ::die_bug("Cannot write: ".$ENV{'HOME'}."/.parallel/will-cite"); - close $fh; - print "\nThank you for your support. It is much appreciated. The citation\n", - "notice is now silenced.\n"; - } - } -} - -sub show_limits { - # Returns: N/A - print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", - "Maximal used size of command: ",Limits::Command::max_length(),"\n", - "\n", - "Execution of will continue now, and it will try to read its input\n", - "and run commands; if this is not what you wanted to happen, please\n", - "press CTRL-D or CTRL-C\n"); -} - -sub __GENERIC_COMMON_FUNCTION__ {} - -sub uniq { - # Remove duplicates and return unique values - return keys %{{ map { $_ => 1 } @_ }}; -} - -sub min { - # Returns: - # Minimum value of array - my $min; - for (@_) { - # Skip undefs - defined $_ or next; - defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef - $min = ($min < $_) ? $min : $_; - } - return $min; -} - -sub max { - # Returns: - # Maximum value of array - my $max; - for (@_) { - # Skip undefs - defined $_ or next; - defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef - $max = ($max > $_) ? $max : $_; - } - return $max; -} - -sub sum { - # Returns: - # Sum of values of array - my @args = @_; - my $sum = 0; - for (@args) { - # Skip undefs - $_ and do { $sum += $_; } - } - return $sum; -} - -sub undef_as_zero { - my $a = shift; - return $a ? $a : 0; -} - -sub undef_as_empty { - my $a = shift; - return $a ? $a : ""; -} - -{ - my $hostname; - sub hostname { - if(not $hostname) { - $hostname = `hostname`; - chomp($hostname); - $hostname ||= "nohostname"; - } - return $hostname; - } -} - -sub which { - # Input: - # @programs = programs to find the path to - # Returns: - # @full_path = full paths to @programs. Nothing if not found - my @which; - for my $prg (@_) { - push @which, map { $_."/".$prg } grep { -x $_."/".$prg } split(":",$ENV{'PATH'}); - } - return @which; -} - -{ - my ($regexp,%fakename); - - sub parent_shell { - # Input: - # $pid = pid to see if (grand)*parent is a shell - # Returns: - # $shellpath = path to shell - undef if no shell found - my $pid = shift; - if(not $regexp) { - # All shells known to mankind - # - # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh - # posh rbash rush rzsh sash sh static-sh tcsh yash zsh - my @shells = qw(ash bash csh dash fdsh fish fizsh ksh - ksh93 mksh pdksh posh rbash rush rzsh - sash sh static-sh tcsh yash zsh -sh -csh); - # Can be formatted as: - # [sh] -sh sh busybox sh - # /bin/sh /sbin/sh /opt/csw/sh - # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh - my $shell = "(?:".join("|",@shells).")"; - $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| )'; - %fakename = ( - # csh and tcsh disguise themselves as -sh/-csh - "-sh" => ["csh", "tcsh"], - "-csh" => ["tcsh", "csh"], - ); - } - my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); - my $shellpath; - my $testpid = $pid; - while($testpid) { - ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); - if($name_of_ref->{$testpid} =~ /$regexp/o) { - ::debug("init", "which ".($3||$6)." => "); - $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; - ::debug("init", "shell path $shellpath\n"); - $shellpath and last; - } - $testpid = $parent_of_ref->{$testpid}; - } - return $shellpath; - } -} - -{ - my %pid_parentpid_cmd; - - sub pid_table { - # Returns: - # %children_of = { pid -> children of pid } - # %parent_of = { pid -> pid of parent } - # %name_of = { pid -> commandname } - - if(not %pid_parentpid_cmd) { - # Filter for SysV-style `ps` - my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). - q(s/^.{$s}//; print "@F[1,2] $_"' ); - # BSD-style `ps` - my $bsd = q(ps -o pid,ppid,command -ax); - %pid_parentpid_cmd = - ( - 'aix' => $sysv, - 'cygwin' => $sysv, - 'msys' => $sysv, - 'dec_osf' => $sysv, - 'darwin' => $bsd, - 'dragonfly' => $bsd, - 'freebsd' => $bsd, - 'gnu' => $sysv, - 'hpux' => $sysv, - 'linux' => $sysv, - 'mirbsd' => $bsd, - 'netbsd' => $bsd, - 'nto' => $sysv, - 'openbsd' => $bsd, - 'solaris' => $sysv, - 'svr5' => $sysv, - ); - } - $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); - - my (@pidtable,%parent_of,%children_of,%name_of); - # Table with pid -> children of pid - @pidtable = `$pid_parentpid_cmd{$^O}`; - my $p=$$; - for (@pidtable) { - # must match: 24436 21224 busybox ash - /(\S+)\s+(\S+)\s+(\S+.*)/ or ::die_bug("pidtable format: $_"); - $parent_of{$1} = $2; - push @{$children_of{$2}}, $1; - $name_of{$1} = $3; - } - return(\%children_of, \%parent_of, \%name_of); - } -} - -sub reap_usleep { - # Reap dead children. - # If no dead children: Sleep specified amount with exponential backoff - # Input: - # $ms = milliseconds to sleep - # Returns: - # $ms/2+0.001 if children reaped - # $ms*1.1 if no children reaped - my $ms = shift; - if(reaper()) { - # Sleep exponentially shorter (1/2^n) if a job finished - return $ms/2+0.001; - } else { - if($opt::timeout) { - $Global::timeoutq->process_timeouts(); - } - usleep($ms); - Job::exit_if_disk_full(); - if($opt::linebuffer) { - for my $job (values %Global::running) { - $job->print(); - } - } - # Sleep exponentially longer (1.1^n) if a job did not finish - # though at most 1000 ms. - return (($ms < 1000) ? ($ms * 1.1) : ($ms)); - } -} - -sub usleep { - # Sleep this many milliseconds. - # Input: - # $ms = milliseconds to sleep - my $ms = shift; - ::debug(int($ms),"ms "); - select(undef, undef, undef, $ms/1000); -} - -sub now { - # Returns time since epoch as in seconds with 3 decimals - # Uses: - # @Global::use - # Returns: - # $time = time now with millisecond accuracy - if(not $Global::use{"Time::HiRes"}) { - if(eval "use Time::HiRes qw ( time );") { - eval "sub TimeHiRestime { return Time::HiRes::time };"; - } else { - eval "sub TimeHiRestime { return time() };"; - } - $Global::use{"Time::HiRes"} = 1; - } - - return (int(TimeHiRestime()*1000))/1000; -} - -sub multiply_binary_prefix { - # Evalualte numbers with binary prefix - # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 - # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 - # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 - # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 - # 13G = 13*1024*1024*1024 = 13958643712 - # Input: - # $s = string with prefixes - # Returns: - # $value = int with prefixes multiplied - my $s = shift; - $s =~ s/ki/*1024/gi; - $s =~ s/mi/*1024*1024/gi; - $s =~ s/gi/*1024*1024*1024/gi; - $s =~ s/ti/*1024*1024*1024*1024/gi; - $s =~ s/pi/*1024*1024*1024*1024*1024/gi; - $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; - - $s =~ s/K/*1024/g; - $s =~ s/M/*1024*1024/g; - $s =~ s/G/*1024*1024*1024/g; - $s =~ s/T/*1024*1024*1024*1024/g; - $s =~ s/P/*1024*1024*1024*1024*1024/g; - $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; - $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; - $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; - $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; - - $s =~ s/k/*1000/g; - $s =~ s/m/*1000*1000/g; - $s =~ s/g/*1000*1000*1000/g; - $s =~ s/t/*1000*1000*1000*1000/g; - $s =~ s/p/*1000*1000*1000*1000*1000/g; - $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; - $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; - - $s = eval $s; - ::debug($s); - return $s; -} - -sub tmpfile { - # Create tempfile as $TMPDIR/parXXXXX - # Returns: - # $filename = file name created - return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); -} - -sub __DEBUGGING__ {} - -sub debug { - # Uses: - # $Global::debug - # %Global::fd - # Returns: N/A - $Global::debug or return; - @_ = grep { defined $_ ? $_ : "" } @_; - if($Global::debug eq "all" or $Global::debug eq $_[0]) { - if($Global::fd{1}) { - # Original stdout was saved - my $stdout = $Global::fd{1}; - print $stdout @_[1..$#_]; - } else { - print @_[1..$#_]; - } - } -} - -sub my_memory_usage { - # Returns: - # memory usage if found - # 0 otherwise - use strict; - use FileHandle; - - my $pid = $$; - if(-e "/proc/$pid/stat") { - my $fh = FileHandle->new("; - chomp $data; - $fh->close; - - my @procinfo = split(/\s+/,$data); - - return undef_as_zero($procinfo[22]); - } else { - return 0; - } -} - -sub my_size { - # Returns: - # $size = size of object if Devel::Size is installed - # -1 otherwise - my @size_this = (@_); - eval "use Devel::Size qw(size total_size)"; - if ($@) { - return -1; - } else { - return total_size(@_); - } -} - -sub my_dump { - # Returns: - # ascii expression of object if Data::Dump(er) is installed - # error code otherwise - my @dump_this = (@_); - eval "use Data::Dump qw(dump);"; - if ($@) { - # Data::Dump not installed - eval "use Data::Dumper;"; - if ($@) { - my $err = "Neither Data::Dump nor Data::Dumper is installed\n". - "Not dumping output\n"; - print $Global::original_stderr $err; - return $err; - } else { - return Dumper(@dump_this); - } - } else { - # Create a dummy Data::Dump:dump as Hans Schou sometimes has - # it undefined - eval "sub Data::Dump:dump {}"; - eval "use Data::Dump qw(dump);"; - return (Data::Dump::dump(@dump_this)); - } -} - -sub my_croak { - eval "use Carp; 1"; - $Carp::Verbose = 1; - croak(@_); -} - -sub my_carp { - eval "use Carp; 1"; - $Carp::Verbose = 1; - carp(@_); -} - -sub __OBJECT_ORIENTED_PARTS__ {} - -package SSHLogin; - -sub new { - my $class = shift; - my $sshlogin_string = shift; - my $ncpus; - my %hostgroups; - # SSHLogins can have these formats: - # @grp+grp/ncpu//usr/bin/ssh user@server - # ncpu//usr/bin/ssh user@server - # /usr/bin/ssh user@server - # user@server - # ncpu/user@server - # @grp+grp/user@server - if($sshlogin_string =~ s:^\@([^/]+)/?::) { - # Look for SSHLogin hostgroups - %hostgroups = map { $_ => 1 } split(/\+/, $1); - } - if ($sshlogin_string =~ s:^(\d+)/::) { - # Override default autodetected ncpus unless missing - $ncpus = $1; - } - my $string = $sshlogin_string; - # An SSHLogin is always in the hostgroup of its $string-name - $hostgroups{$string} = 1; - @Global::hostgroups{keys %hostgroups} = values %hostgroups; - my @unget = (); - my $no_slash_string = $string; - $no_slash_string =~ s/[^-a-z0-9:]/_/gi; - return bless { - 'string' => $string, - 'jobs_running' => 0, - 'jobs_completed' => 0, - 'maxlength' => undef, - 'max_jobs_running' => undef, - 'orig_max_jobs_running' => undef, - 'ncpus' => $ncpus, - 'hostgroups' => \%hostgroups, - 'sshcommand' => undef, - 'serverlogin' => undef, - 'control_path_dir' => undef, - 'control_path' => undef, - 'time_to_login' => undef, - 'last_login_at' => undef, - 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . - $no_slash_string, - 'loadavg' => undef, - 'last_loadavg_update' => 0, - 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . - $no_slash_string, - 'swap_activity' => undef, - }, ref($class) || $class; -} - -sub DESTROY { - my $self = shift; - # Remove temporary files if they are created. - unlink $self->{'loadavg_file'}; - unlink $self->{'swap_activity_file'}; -} - -sub string { - my $self = shift; - return $self->{'string'}; -} - -sub jobs_running { - my $self = shift; - - return ($self->{'jobs_running'} || "0"); -} - -sub inc_jobs_running { - my $self = shift; - $self->{'jobs_running'}++; -} - -sub dec_jobs_running { - my $self = shift; - $self->{'jobs_running'}--; -} - -sub set_maxlength { - my $self = shift; - $self->{'maxlength'} = shift; -} - -sub maxlength { - my $self = shift; - return $self->{'maxlength'}; -} - -sub jobs_completed { - my $self = shift; - return $self->{'jobs_completed'}; -} - -sub in_hostgroups { - # Input: - # @hostgroups = the hostgroups to look for - # Returns: - # true if intersection of @hostgroups and the hostgroups of this - # SSHLogin is non-empty - my $self = shift; - return grep { defined $self->{'hostgroups'}{$_} } @_; -} - -sub hostgroups { - my $self = shift; - return keys %{$self->{'hostgroups'}}; -} - -sub inc_jobs_completed { - my $self = shift; - $self->{'jobs_completed'}++; -} - -sub set_max_jobs_running { - my $self = shift; - if(defined $self->{'max_jobs_running'}) { - $Global::max_jobs_running -= $self->{'max_jobs_running'}; - } - $self->{'max_jobs_running'} = shift; - if(defined $self->{'max_jobs_running'}) { - # max_jobs_running could be resat if -j is a changed file - $Global::max_jobs_running += $self->{'max_jobs_running'}; - } - # Initialize orig to the first non-zero value that comes around - $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; -} - -sub swapping { - my $self = shift; - my $swapping = $self->swap_activity(); - return (not defined $swapping or $swapping) -} - -sub swap_activity { - # If the currently known swap activity is too old: - # Recompute a new one in the background - # Returns: - # last swap activity computed - my $self = shift; - # Should we update the swap_activity file? - my $update_swap_activity_file = 0; - if(-r $self->{'swap_activity_file'}) { - open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); - my $swap_out = <$swap_fh>; - close $swap_fh; - if($swap_out =~ /^(\d+)$/) { - $self->{'swap_activity'} = $1; - ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); - } - ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); - if(time - $self->{'last_swap_activity_update'} > 10) { - # last swap activity update was started 10 seconds ago - ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); - $update_swap_activity_file = 1; - } - } else { - ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); - $self->{'swap_activity'} = undef; - $update_swap_activity_file = 1; - } - if($update_swap_activity_file) { - ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); - $self->{'last_swap_activity_update'} = time; - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - my $swap_activity; - $swap_activity = swapactivityscript(); - if($self->{'string'} ne ":") { - $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . - ::shell_quote_scalar($swap_activity); - } - # Run swap_activity measuring. - # As the command can take long to run if run remote - # save it to a tmp file before moving it to the correct file - my $file = $self->{'swap_activity_file'}; - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); - ::debug("swap", "\n", $swap_activity, "\n"); - qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; - } - return $self->{'swap_activity'}; -} - -{ - my $script; - - sub swapactivityscript { - # Returns: - # shellscript for detecting swap activity - # - # arguments for vmstat are OS dependant - # swap_in and swap_out are in different columns depending on OS - # - if(not $script) { - my %vmstat = ( - # linux: $7*$8 - # $ vmstat 1 2 - # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- - # r b swpd free buff cache si so bi bo in cs us sy id wa - # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 - # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 - 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], - - # solaris: $6*$7 - # $ vmstat -S 1 2 - # kthr memory page disk faults cpu - # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id - # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 - # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 - 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], - - # darwin (macosx): $21*$22 - # $ vm_stat -c 2 1 - # Mach Virtual Memory Statistics: (page size of 4096 bytes) - # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts - # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 - # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 - 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], - - # ultrix: $12*$13 - # $ vmstat -S 1 2 - # procs faults cpu memory page disk - # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 - # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 - # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 - 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], - - # aix: $6*$7 - # $ vmstat 1 2 - # System configuration: lcpu=1 mem=2048MB - # - # kthr memory page faults cpu - # ----- ----------- ------------------------ ------------ ----------- - # r b avm fre re pi po fr sr cy in sy cs us sy id wa - # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 - # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 - 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], - - # freebsd: $8*$9 - # $ vmstat -H 1 2 - # procs memory page disks faults cpu - # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id - # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 - # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 - 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], - - # mirbsd: $8*$9 - # $ vmstat 1 2 - # procs memory page disks traps cpu - # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id - # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 - # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 - 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # netbsd: $7*$8 - # $ vmstat 1 2 - # procs memory page disks faults cpu - # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id - # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 - # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 - 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], - - # openbsd: $8*$9 - # $ vmstat 1 2 - # procs memory page disks traps cpu - # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id - # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 - # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 - 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # hpux: $8*$9 - # $ vmstat 1 2 - # procs memory page faults cpu - # r b w avm free re at pi po fr de sr in sy cs us sy id - # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 - # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 - 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], - - # dec_osf (tru64): $11*$12 - # $ vmstat 1 2 - # Virtual Memory Statistics: (pagesize = 8192) - # procs memory pages intr cpu - # r w u act free wire fault cow zero react pin pout in sy cs us sy id - # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 - # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 - 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], - - # gnu (hurd): $7*$8 - # $ vmstat -k 1 2 - # (pagesize: 4, size: 512288, swap size: 894972) - # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree - # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 - # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 - 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], - - # -nto (qnx has no swap) - #-irix - #-svr5 (scosysv) - ); - my $perlscript = ""; - for my $os (keys %vmstat) { - #q[ { vmstat 1 2 2> /dev/null || vmstat -c 1 2; } | ]. - # q[ awk 'NR!=4{next} NF==17||NF==16{print $7*$8} NF==22{print $21*$22} {exit}' ]; - $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ - $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . - $vmstat{$os}[1] . '}"` }'; - } - $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); - $script = $Global::envvar. " " .$perlscript; - } - return $script; - } -} - -sub too_fast_remote_login { - my $self = shift; - if($self->{'last_login_at'} and $self->{'time_to_login'}) { - # sshd normally allows 10 simultaneous logins - # A login takes time_to_login - # So time_to_login/5 should be safe - # If now <= last_login + time_to_login/5: Then it is too soon. - my $too_fast = (::now() <= $self->{'last_login_at'} - + $self->{'time_to_login'}/5); - ::debug("run", "Too fast? $too_fast "); - return $too_fast; - } else { - # No logins so far (or time_to_login not computed): it is not too fast - return 0; - } -} - -sub last_login_at { - my $self = shift; - return $self->{'last_login_at'}; -} - -sub set_last_login_at { - my $self = shift; - $self->{'last_login_at'} = shift; -} - -sub loadavg_too_high { - my $self = shift; - my $loadavg = $self->loadavg(); - return (not defined $loadavg or - $loadavg > $self->max_loadavg()); -} - -sub loadavg { - # If the currently know loadavg is too old: - # Recompute a new one in the background - # The load average is computed as the number of processes waiting for disk - # or CPU right now. So it is the server load this instant and not averaged over - # several minutes. This is needed so GNU Parallel will at most start one job - # that will push the load over the limit. - # - # Returns: - # $last_loadavg = last load average computed (undef if none) - my $self = shift; - # Should we update the loadavg file? - my $update_loadavg_file = 0; - if(open(my $load_fh, "<", $self->{'loadavg_file'})) { - local $/ = undef; - my $load_out = <$load_fh>; - close $load_fh; - my $load =()= ($load_out=~/(^[DR]....[^\[])/gm); - if($load > 0) { - # load is overestimated by 1 - $self->{'loadavg'} = $load - 1; - ::debug("load", "New loadavg: ", $self->{'loadavg'}); - } else { - ::die_bug("loadavg_invalid_content: $load_out"); - } - ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); - if(time - $self->{'last_loadavg_update'} > 10) { - # last loadavg was started 10 seconds ago - ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", - $self->{'loadavg_file'}); - $update_loadavg_file = 1; - } - } else { - ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); - $self->{'loadavg'} = undef; - $update_loadavg_file = 1; - } - if($update_loadavg_file) { - ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); - $self->{'last_loadavg_update'} = time; - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - my $cmd = ""; - if($self->{'string'} ne ":") { - $cmd = $self->sshcommand() . " " . $self->serverlogin() . " "; - } - # TODO Is is called 'ps ax -o state,command' on other platforms? - $cmd .= "ps ax -o state,command"; - # As the command can take long to run if run remote - # save it to a tmp file before moving it to the correct file - my $file = $self->{'loadavg_file'}; - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); - qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; - } - return $self->{'loadavg'}; -} - -sub max_loadavg { - my $self = shift; - # If --load is a file it might be changed - if($Global::max_load_file) { - my $mtime = (stat($Global::max_load_file))[9]; - if($mtime > $Global::max_load_file_last_mod) { - $Global::max_load_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_loadavg(undef); - } - } - } - if(not defined $self->{'max_loadavg'}) { - $self->{'max_loadavg'} = - $self->compute_max_loadavg($opt::load); - } - ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); - return $self->{'max_loadavg'}; -} - -sub set_max_loadavg { - my $self = shift; - $self->{'max_loadavg'} = shift; -} - -sub compute_max_loadavg { - # Parse the max loadaverage that the user asked for using --load - # Returns: - # max loadaverage - my $self = shift; - my $loadspec = shift; - my $load; - if(defined $loadspec) { - if($loadspec =~ /^\+(\d+)$/) { - # E.g. --load +2 - my $j = $1; - $load = - $self->ncpus() + $j; - } elsif ($loadspec =~ /^-(\d+)$/) { - # E.g. --load -2 - my $j = $1; - $load = - $self->ncpus() - $j; - } elsif ($loadspec =~ /^(\d+)\%$/) { - my $j = $1; - $load = - $self->ncpus() * $j / 100; - } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { - $load = $1; - } elsif (-f $loadspec) { - $Global::max_load_file = $loadspec; - $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; - if(open(my $in_fh, "<", $Global::max_load_file)) { - my $opt_load_file = join("",<$in_fh>); - close $in_fh; - $load = $self->compute_max_loadavg($opt_load_file); - } else { - print $Global::original_stderr "Cannot open $loadspec\n"; - ::wait_and_exit(255); - } - } else { - print $Global::original_stderr "Parsing of --load failed\n"; - ::die_usage(); - } - if($load < 0.01) { - $load = 0.01; - } - } - return $load; -} - -sub time_to_login { - my $self = shift; - return $self->{'time_to_login'}; -} - -sub set_time_to_login { - my $self = shift; - $self->{'time_to_login'} = shift; -} - -sub max_jobs_running { - my $self = shift; - if(not defined $self->{'max_jobs_running'}) { - my $nproc = $self->compute_number_of_processes($opt::jobs); - $self->set_max_jobs_running($nproc); - } - return $self->{'max_jobs_running'}; -} - -sub orig_max_jobs_running { - my $self = shift; - return $self->{'orig_max_jobs_running'}; -} - -sub compute_number_of_processes { - # Number of processes wanted and limited by system resources - # Returns: - # Number of processes - my $self = shift; - my $opt_P = shift; - my $wanted_processes = $self->user_requested_processes($opt_P); - if(not defined $wanted_processes) { - $wanted_processes = $Global::default_simultaneous_sshlogins; - } - ::debug("load", "Wanted procs: $wanted_processes\n"); - my $system_limit = - $self->processes_available_by_system_limit($wanted_processes); - ::debug("load", "Limited to procs: $system_limit\n"); - return $system_limit; -} - -sub processes_available_by_system_limit { - # If the wanted number of processes is bigger than the system limits: - # Limit them to the system limits - # Limits are: File handles, number of input lines, processes, - # and taking > 1 second to spawn 10 extra processes - # Returns: - # Number of processes - my $self = shift; - my $wanted_processes = shift; - - my $system_limit = 0; - my @jobs = (); - my $job; - my @args = (); - my $arg; - my $more_filehandles = 1; - my $max_system_proc_reached = 0; - my $slow_spawining_warning_printed = 0; - my $time = time; - my %fh; - my @children; - - # Reserve filehandles - # perl uses 7 filehandles for something? - # parallel uses 1 for memory_usage - # parallel uses 4 for ? - for my $i (1..12) { - open($fh{"init-$i"}, "<", "/dev/null"); - } - - for(1..2) { - # System process limit - my $child; - if($child = fork()) { - push (@children,$child); - $Global::unkilled_children{$child} = 1; - } elsif(defined $child) { - # The child takes one process slot - # It will be killed later - $SIG{TERM} = $Global::original_sig{TERM}; - sleep 10000000; - exit(0); - } else { - $max_system_proc_reached = 1; - } - } - my $count_jobs_already_read = $Global::JobQueue->next_seq(); - my $wait_time_for_getting_args = 0; - my $start_time = time; - while(1) { - $system_limit >= $wanted_processes and last; - not $more_filehandles and last; - $max_system_proc_reached and last; - my $before_getting_arg = time; - if($Global::semaphore or $opt::pipe) { - # Skip: No need to get args - } elsif(defined $opt::retries and $count_jobs_already_read) { - # For retries we may need to run all jobs on this sshlogin - # so include the already read jobs for this sshlogin - $count_jobs_already_read--; - } else { - if($opt::X or $opt::m) { - # The arguments may have to be re-spread over several jobslots - # So pessimistically only read one arg per jobslot - # instead of a full commandline - if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { - if($Global::JobQueue->empty()) { - last; - } else { - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } else { - ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); - push(@args, $arg); - } - } else { - # If there are no more command lines, then we have a process - # per command line, so no need to go further - $Global::JobQueue->empty() and last; - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } - $wait_time_for_getting_args += time - $before_getting_arg; - $system_limit++; - - # Every simultaneous process uses 2 filehandles when grouping - # Every simultaneous process uses 2 filehandles when compressing - $more_filehandles = open($fh{$system_limit*10}, "<", "/dev/null") - && open($fh{$system_limit*10+2}, "<", "/dev/null") - && open($fh{$system_limit*10+3}, "<", "/dev/null") - && open($fh{$system_limit*10+4}, "<", "/dev/null"); - - # System process limit - my $child; - if($child = fork()) { - push (@children,$child); - $Global::unkilled_children{$child} = 1; - } elsif(defined $child) { - # The child takes one process slot - # It will be killed later - $SIG{TERM} = $Global::original_sig{TERM}; - sleep 10000000; - exit(0); - } else { - $max_system_proc_reached = 1; - } - my $forktime = time - $time - $wait_time_for_getting_args; - ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", - $forktime, - " (processes so far: ", $system_limit,")\n"); - if($system_limit > 10 and - $forktime > 1 and - $forktime > $system_limit * 0.01 - and not $slow_spawining_warning_printed) { - # It took more than 0.01 second to fork a processes on avg. - # Give the user a warning. He can press Ctrl-C if this - # sucks. - print $Global::original_stderr - ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n", - "Consider adjusting -j. Press CTRL-C to stop.\n"); - $slow_spawining_warning_printed = 1; - } - } - # Cleanup: Close the files - for (values %fh) { close $_ } - # Cleanup: Kill the children - for my $pid (@children) { - kill 9, $pid; - waitpid($pid,0); - delete $Global::unkilled_children{$pid}; - } - # Cleanup: Unget the command_lines or the @args - $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); - $Global::JobQueue->unget(@jobs); - if($system_limit < $wanted_processes) { - # The system_limit is less than the wanted_processes - if($system_limit < 1 and not $Global::JobQueue->empty()) { - ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", - "or /proc/sys/kernel/pid_max may help.\n"); - ::wait_and_exit(255); - } - if(not $more_filehandles) { - ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", - "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", - "raising ulimit -n or /etc/security/limits.conf may help.\n"); - } - if($max_system_proc_reached) { - ::warning("Only enough available processes to run ", $system_limit, - " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", - "or /proc/sys/kernel/pid_max may help.\n"); - } - } - if($] == 5.008008 and $system_limit > 1000) { - # https://savannah.gnu.org/bugs/?36942 - $system_limit = 1000; - } - if($Global::JobQueue->empty()) { - $system_limit ||= 1; - } - if($self->string() ne ":" and - $system_limit > $Global::default_simultaneous_sshlogins) { - $system_limit = - $self->simultaneous_sshlogin_limit($system_limit); - } - return $system_limit; -} - -sub simultaneous_sshlogin_limit { - # Test by logging in wanted number of times simultaneously - # Returns: - # min($wanted_processes,$working_simultaneous_ssh_logins-1) - my $self = shift; - my $wanted_processes = shift; - if($self->{'time_to_login'}) { - return $wanted_processes; - } - - # Try twice because it guesses wrong sometimes - # Choose the minimal - my $ssh_limit = - ::min($self->simultaneous_sshlogin($wanted_processes), - $self->simultaneous_sshlogin($wanted_processes)); - if($ssh_limit < $wanted_processes) { - my $serverlogin = $self->serverlogin(); - ::warning("ssh to $serverlogin only allows ", - "for $ssh_limit simultaneous logins.\n", - "You may raise this by changing ", - "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", - "Using only ",$ssh_limit-1," connections ", - "to avoid race conditions.\n"); - } - # Race condition can cause problem if using all sshs. - if($ssh_limit > 1) { $ssh_limit -= 1; } - return $ssh_limit; -} - -sub simultaneous_sshlogin { - # Using $sshlogin try to see if we can do $wanted_processes - # simultaneous logins - # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l - # Returns: - # Number of succesful logins - my $self = shift; - my $wanted_processes = shift; - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; - my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes; - ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); - open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or - ::die_bug("simultaneouslogin"); - my $ssh_limit = <$simul_fh>; - close $simul_fh; - chomp $ssh_limit; - return $ssh_limit; -} - -sub set_ncpus { - my $self = shift; - $self->{'ncpus'} = shift; -} - -sub user_requested_processes { - # Parse the number of processes that the user asked for using -j - # Returns: - # the number of processes to run on this sshlogin - my $self = shift; - my $opt_P = shift; - my $processes; - if(defined $opt_P) { - if($opt_P =~ /^\+(\d+)$/) { - # E.g. -P +2 - my $j = $1; - $processes = - $self->ncpus() + $j; - } elsif ($opt_P =~ /^-(\d+)$/) { - # E.g. -P -2 - my $j = $1; - $processes = - $self->ncpus() - $j; - } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { - # E.g. -P 10.5% - my $j = $1; - $processes = - $self->ncpus() * $j / 100; - } elsif ($opt_P =~ /^(\d+)$/) { - $processes = $1; - if($processes == 0) { - # -P 0 = infinity (or at least close) - $processes = $Global::infinity; - } - } elsif (-f $opt_P) { - $Global::max_procs_file = $opt_P; - $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9]; - if(open(my $in_fh, "<", $Global::max_procs_file)) { - my $opt_P_file = join("",<$in_fh>); - close $in_fh; - $processes = $self->user_requested_processes($opt_P_file); - } else { - ::error("Cannot open $opt_P.\n"); - ::wait_and_exit(255); - } - } else { - ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); - ::die_usage(); - } - $processes = ::ceil($processes); - } - return $processes; -} - -sub ncpus { - my $self = shift; - if(not defined $self->{'ncpus'}) { - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - if($serverlogin eq ":") { - if($opt::use_cpus_instead_of_cores) { - $self->{'ncpus'} = no_of_cpus(); - } else { - $self->{'ncpus'} = no_of_cores(); - } - } else { - my $ncpu; - my $sqe = ::shell_quote_scalar($Global::envvar); - if($opt::use_cpus_instead_of_cores) { - $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); - } else { - ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); - $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); - } - chomp $ncpu; - if($ncpu =~ /^\s*[0-9]+\s*$/s) { - $self->{'ncpus'} = $ncpu; - } else { - ::warning("Could not figure out ", - "number of cpus on $serverlogin ($ncpu). Using 1.\n"); - $self->{'ncpus'} = 1; - } - } - } - return $self->{'ncpus'}; -} - -sub no_of_cpus { - # Returns: - # Number of physical CPUs - local $/="\n"; # If delimiter is set, then $/ will be wrong - my $no_of_cpus; - if ($^O eq 'linux') { - $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); - } elsif ($^O eq 'freebsd') { - $no_of_cpus = no_of_cpus_freebsd(); - } elsif ($^O eq 'netbsd') { - $no_of_cpus = no_of_cpus_netbsd(); - } elsif ($^O eq 'openbsd') { - $no_of_cpus = no_of_cpus_openbsd(); - } elsif ($^O eq 'gnu') { - $no_of_cpus = no_of_cpus_hurd(); - } elsif ($^O eq 'darwin') { - $no_of_cpus = no_of_cpus_darwin(); - } elsif ($^O eq 'solaris') { - $no_of_cpus = no_of_cpus_solaris(); - } elsif ($^O eq 'aix') { - $no_of_cpus = no_of_cpus_aix(); - } elsif ($^O eq 'hpux') { - $no_of_cpus = no_of_cpus_hpux(); - } elsif ($^O eq 'nto') { - $no_of_cpus = no_of_cpus_qnx(); - } elsif ($^O eq 'svr5') { - $no_of_cpus = no_of_cpus_openserver(); - } elsif ($^O eq 'irix') { - $no_of_cpus = no_of_cpus_irix(); - } elsif ($^O eq 'dec_osf') { - $no_of_cpus = no_of_cpus_tru64(); - } else { - $no_of_cpus = (no_of_cpus_gnu_linux() - || no_of_cpus_freebsd() - || no_of_cpus_netbsd() - || no_of_cpus_openbsd() - || no_of_cpus_hurd() - || no_of_cpus_darwin() - || no_of_cpus_solaris() - || no_of_cpus_aix() - || no_of_cpus_hpux() - || no_of_cpus_qnx() - || no_of_cpus_openserver() - || no_of_cpus_irix() - || no_of_cpus_tru64() - # Number of cores is better than no guess for #CPUs - || nproc() - ); - } - if($no_of_cpus) { - chomp $no_of_cpus; - return $no_of_cpus; - } else { - ::warning("Cannot figure out number of cpus. Using 1.\n"); - return 1; - } -} - -sub no_of_cores { - # Returns: - # Number of CPU cores - local $/="\n"; # If delimiter is set, then $/ will be wrong - my $no_of_cores; - if ($^O eq 'linux') { - $no_of_cores = no_of_cores_gnu_linux(); - } elsif ($^O eq 'freebsd') { - $no_of_cores = no_of_cores_freebsd(); - } elsif ($^O eq 'netbsd') { - $no_of_cores = no_of_cores_netbsd(); - } elsif ($^O eq 'openbsd') { - $no_of_cores = no_of_cores_openbsd(); - } elsif ($^O eq 'gnu') { - $no_of_cores = no_of_cores_hurd(); - } elsif ($^O eq 'darwin') { - $no_of_cores = no_of_cores_darwin(); - } elsif ($^O eq 'solaris') { - $no_of_cores = no_of_cores_solaris(); - } elsif ($^O eq 'aix') { - $no_of_cores = no_of_cores_aix(); - } elsif ($^O eq 'hpux') { - $no_of_cores = no_of_cores_hpux(); - } elsif ($^O eq 'nto') { - $no_of_cores = no_of_cores_qnx(); - } elsif ($^O eq 'svr5') { - $no_of_cores = no_of_cores_openserver(); - } elsif ($^O eq 'irix') { - $no_of_cores = no_of_cores_irix(); - } elsif ($^O eq 'dec_osf') { - $no_of_cores = no_of_cores_tru64(); - } else { - $no_of_cores = (no_of_cores_gnu_linux() - || no_of_cores_freebsd() - || no_of_cores_netbsd() - || no_of_cores_openbsd() - || no_of_cores_hurd() - || no_of_cores_darwin() - || no_of_cores_solaris() - || no_of_cores_aix() - || no_of_cores_hpux() - || no_of_cores_qnx() - || no_of_cores_openserver() - || no_of_cores_irix() - || no_of_cores_tru64() - || nproc() - ); - } - if($no_of_cores) { - chomp $no_of_cores; - return $no_of_cores; - } else { - ::warning("Cannot figure out number of CPU cores. Using 1.\n"); - return 1; - } -} - -sub nproc { - # Returns: - # Number of cores using `nproc` - my $no_of_cores = `nproc 2>/dev/null`; - return $no_of_cores; -} - -sub no_of_cpus_gnu_linux { - # Returns: - # Number of physical CPUs on GNU/Linux - # undef if not GNU/Linux - my $no_of_cpus; - my $no_of_cores; - if(-e "/proc/cpuinfo") { - $no_of_cpus = 0; - $no_of_cores = 0; - my %seen; - open(my $in_fh, "<", "/proc/cpuinfo") || return undef; - while(<$in_fh>) { - if(/^physical id.*[:](.*)/ and not $seen{$1}++) { - $no_of_cpus++; - } - /^processor.*[:]/i and $no_of_cores++; - } - close $in_fh; - } - return ($no_of_cpus||$no_of_cores); -} - -sub no_of_cores_gnu_linux { - # Returns: - # Number of CPU cores on GNU/Linux - # undef if not GNU/Linux - my $no_of_cores; - if(-e "/proc/cpuinfo") { - $no_of_cores = 0; - open(my $in_fh, "<", "/proc/cpuinfo") || return undef; - while(<$in_fh>) { - /^processor.*[:]/i and $no_of_cores++; - } - close $in_fh; - } - return $no_of_cores; -} - -sub no_of_cpus_freebsd { - # Returns: - # Number of physical CPUs on FreeBSD - # undef if not FreeBSD - my $no_of_cpus = - (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'` - or - `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`); - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_freebsd { - # Returns: - # Number of CPU cores on FreeBSD - # undef if not FreeBSD - my $no_of_cores = - (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_netbsd { - # Returns: - # Number of physical CPUs on NetBSD - # undef if not NetBSD - my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_netbsd { - # Returns: - # Number of CPU cores on NetBSD - # undef if not NetBSD - my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_openbsd { - # Returns: - # Number of physical CPUs on OpenBSD - # undef if not OpenBSD - my $no_of_cpus = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_openbsd { - # Returns: - # Number of CPU cores on OpenBSD - # undef if not OpenBSD - my $no_of_cores = `sysctl -n hw.ncpu 2>/dev/null`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_hurd { - # Returns: - # Number of physical CPUs on HURD - # undef if not HURD - my $no_of_cpus = `nproc`; - chomp $no_of_cpus; - return $no_of_cpus; -} - -sub no_of_cores_hurd { - # Returns: - # Number of physical CPUs on HURD - # undef if not HURD - my $no_of_cores = `nproc`; - chomp $no_of_cores; - return $no_of_cores; -} - -sub no_of_cpus_darwin { - # Returns: - # Number of physical CPUs on Mac Darwin - # undef if not Mac Darwin - my $no_of_cpus = - (`sysctl -n hw.physicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }'`); - return $no_of_cpus; -} - -sub no_of_cores_darwin { - # Returns: - # Number of CPU cores on Mac Darwin - # undef if not Mac Darwin - my $no_of_cores = - (`sysctl -n hw.logicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }'`); - return $no_of_cores; -} - -sub no_of_cpus_solaris { - # Returns: - # Number of physical CPUs on Solaris - # undef if not Solaris - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; - if($#prtconf >= 0) { - return $#prtconf +1; - } - } - return undef; -} - -sub no_of_cores_solaris { - # Returns: - # Number of CPU cores on Solaris - # undef if not Solaris - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; - if($#prtconf >= 0) { - return $#prtconf +1; - } - } - return undef; -} - -sub no_of_cpus_aix { - # Returns: - # Number of physical CPUs on AIX - # undef if not AIX - my $no_of_cpus = 0; - if(-x "/usr/sbin/lscfg") { - open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") - || return undef; - $no_of_cpus = <$in_fh>; - chomp ($no_of_cpus); - close $in_fh; - } - return $no_of_cpus; -} - -sub no_of_cores_aix { - # Returns: - # Number of CPU cores on AIX - # undef if not AIX - my $no_of_cores; - if(-x "/usr/bin/vmstat") { - open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; - while(<$in_fh>) { - /lcpu=([0-9]*) / and $no_of_cores = $1; - } - close $in_fh; - } - return $no_of_cores; -} - -sub no_of_cpus_hpux { - # Returns: - # Number of physical CPUs on HP-UX - # undef if not HP-UX - my $no_of_cpus = - (`/usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'`); - return $no_of_cpus; -} - -sub no_of_cores_hpux { - # Returns: - # Number of CPU cores on HP-UX - # undef if not HP-UX - my $no_of_cores = - (`/usr/bin/mpsched -s 2>&1 | grep 'Processor Count' | awk '{ print \$3 }'`); - return $no_of_cores; -} - -sub no_of_cpus_qnx { - # Returns: - # Number of physical CPUs on QNX - # undef if not QNX - # BUG: It is now known how to calculate this. - my $no_of_cpus = 0; - return $no_of_cpus; -} - -sub no_of_cores_qnx { - # Returns: - # Number of CPU cores on QNX - # undef if not QNX - # BUG: It is now known how to calculate this. - my $no_of_cores = 0; - return $no_of_cores; -} - -sub no_of_cpus_openserver { - # Returns: - # Number of physical CPUs on SCO OpenServer - # undef if not SCO OpenServer - my $no_of_cpus = 0; - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - return $no_of_cpus; -} - -sub no_of_cores_openserver { - # Returns: - # Number of CPU cores on SCO OpenServer - # undef if not SCO OpenServer - my $no_of_cores = 0; - if(-x "/usr/sbin/psrinfo") { - my @psrinfo = `/usr/sbin/psrinfo`; - if($#psrinfo >= 0) { - return $#psrinfo +1; - } - } - return $no_of_cores; -} - -sub no_of_cpus_irix { - # Returns: - # Number of physical CPUs on IRIX - # undef if not IRIX - my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; - return $no_of_cpus; -} - -sub no_of_cores_irix { - # Returns: - # Number of CPU cores on IRIX - # undef if not IRIX - my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; - return $no_of_cores; -} - -sub no_of_cpus_tru64 { - # Returns: - # Number of physical CPUs on Tru64 - # undef if not Tru64 - my $no_of_cpus = `sizer -pr`; - return $no_of_cpus; -} - -sub no_of_cores_tru64 { - # Returns: - # Number of CPU cores on Tru64 - # undef if not Tru64 - my $no_of_cores = `sizer -pr`; - return $no_of_cores; -} - -sub sshcommand { - my $self = shift; - if (not defined $self->{'sshcommand'}) { - $self->sshcommand_of_sshlogin(); - } - return $self->{'sshcommand'}; -} - -sub serverlogin { - my $self = shift; - if (not defined $self->{'serverlogin'}) { - $self->sshcommand_of_sshlogin(); - } - return $self->{'serverlogin'}; -} - -sub sshcommand_of_sshlogin { - # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') - # 'user@server' -> ('ssh','user@server') - # 'myssh user@server' -> ('myssh','user@server') - # 'myssh -l user server' -> ('myssh -l user','server') - # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') - # Returns: - # sshcommand - defaults to 'ssh' - # login@host - my $self = shift; - my ($sshcmd, $serverlogin); - if($self->{'string'} =~ /(.+) (\S+)$/) { - # Own ssh command - $sshcmd = $1; $serverlogin = $2; - } else { - # Normal ssh - if($opt::controlmaster) { - # Use control_path to make ssh faster - my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; - $sshcmd = "ssh -S ".$control_path; - $serverlogin = $self->{'string'}; - if(not $self->{'control_path'}{$control_path}++) { - # Master is not running for this control_path - # Start it - my $pid = fork(); - if($pid) { - $Global::sshmaster{$pid} ||= 1; - } else { - $SIG{'TERM'} = undef; - # Ignore the 'foo' being printed - open(STDOUT,">","/dev/null"); - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" - open(STDERR,">","/dev/null"); - open(STDIN,"<","/dev/null"); - # Run a sleep that outputs data, so it will discover if the ssh connection closes. - my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); - my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); - exec(@master); - } - } - } else { - $sshcmd = "ssh"; $serverlogin = $self->{'string'}; - } - } - $self->{'sshcommand'} = $sshcmd; - $self->{'serverlogin'} = $serverlogin; -} - -sub control_path_dir { - # Returns: - # path to directory - my $self = shift; - if(not defined $self->{'control_path_dir'}) { - -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; - -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - $self->{'control_path_dir'} = - File::Temp::tempdir($ENV{'HOME'} - . "/.parallel/tmp/control_path_dir-XXXX", - CLEANUP => 1); - } - return $self->{'control_path_dir'}; -} - -sub rsync_transfer_cmd { - # Command to run to transfer a file - # Input: - # $file = filename of file to transfer - # $workdir = destination dir - # Returns: - # $cmd = rsync command to run to transfer $file ("" if unreadable) - my $self = shift; - my $file = shift; - my $workdir = shift; - if(not -r $file) { - ::warning($file, " is not readable and will not be transferred.\n"); - return "true"; - } - my $rsync_destdir; - if($file =~ m:^/:) { - # rsync /foo/bar / - $rsync_destdir = "/"; - } else { - $rsync_destdir = ::shell_quote_file($workdir); - } - $file = ::shell_quote_file($file); - my $sshcmd = $self->sshcommand(); - my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); - my $serverlogin = $self->serverlogin(); - # Make dir if it does not exist - return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . - rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; -} - -sub cleanup_cmd { - # Command to run to remove the remote file - # Input: - # $file = filename to remove - # $workdir = destination dir - # Returns: - # $cmd = ssh command to run to remove $file and empty parent dirs - my $self = shift; - my $file = shift; - my $workdir = shift; - my $f = $file; - if($f =~ m:/\./:) { - # foo/bar/./baz/quux => workdir/baz/quux - # /foo/bar/./baz/quux => workdir/baz/quux - $f =~ s:.*/\./:$workdir/:; - } elsif($f =~ m:^[^/]:) { - # foo/bar => workdir/foo/bar - $f = $workdir."/".$f; - } - my @subdirs = split m:/:, ::dirname($f); - my @rmdir; - my $dir = ""; - for(@subdirs) { - $dir .= $_."/"; - unshift @rmdir, ::shell_quote_file($dir); - } - my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; - if(defined $opt::workdir and $opt::workdir eq "...") { - $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; - } - - $f = ::shell_quote_file($f); - my $sshcmd = $self->sshcommand(); - my $serverlogin = $self->serverlogin(); - return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); -} - -{ - my $rsync; - - sub rsync { - # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. - # If the version >= 3.1.0: downgrade to protocol 30 - if(not $rsync) { - my @out = `rsync --version`; - for (@out) { - if(/version (\d+.\d+)(.\d+)?/) { - if($1 >= 3.1) { - # Version 3.1.0 or later: Downgrade to protocol 30 - $rsync = "rsync --protocol 30"; - } else { - $rsync = "rsync"; - } - } - } - $rsync or ::die_bug("Cannot figure out version of rsync: @out"); - } - return $rsync; - } -} - - -package JobQueue; - -sub new { - my $class = shift; - my $commandref = shift; - my $read_from = shift; - my $context_replace = shift; - my $max_number_of_args = shift; - my $return_files = shift; - my $commandlinequeue = CommandLineQueue->new - ($commandref, $read_from, $context_replace, $max_number_of_args, - $return_files); - my @unget = (); - return bless { - 'unget' => \@unget, - 'commandlinequeue' => $commandlinequeue, - 'total_jobs' => undef, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - - if(@{$self->{'unget'}}) { - my $job = shift @{$self->{'unget'}}; - return ($job); - } else { - my $commandline = $self->{'commandlinequeue'}->get(); - if(defined $commandline) { - my $job = Job->new($commandline); - return $job; - } else { - return undef; - } - } -} - -sub unget { - my $self = shift; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}}) - && $self->{'commandlinequeue'}->empty(); - ::debug("run", "JobQueue->empty $empty "); - return $empty; -} - -sub total_jobs { - my $self = shift; - if(not defined $self->{'total_jobs'}) { - my $job; - my @queue; - my $start = time; - while($job = $self->get()) { - if(time - $start > 10) { - ::warning("Reading all arguments takes longer than 10 seconds.\n"); - $opt::eta && ::warning("Consider removing --eta.\n"); - $opt::bar && ::warning("Consider removing --bar.\n"); - last; - } - push @queue, $job; - } - while($job = $self->get()) { - push @queue, $job; - } - - $self->unget(@queue); - $self->{'total_jobs'} = $#queue+1; - } - return $self->{'total_jobs'}; -} - -sub next_seq { - my $self = shift; - - return $self->{'commandlinequeue'}->seq(); -} - -sub quote_args { - my $self = shift; - return $self->{'commandlinequeue'}->quote_args(); -} - - -package Job; - -sub new { - my $class = shift; - my $commandlineref = shift; - return bless { - 'commandline' => $commandlineref, # CommandLine object - 'workdir' => undef, # --workdir - 'stdin' => undef, # filehandle for stdin (used for --pipe) - # filename for writing stdout to (used for --files) - 'remaining' => "", # remaining data not sent to stdin (used for --pipe) - 'datawritten' => 0, # amount of data sent via stdin (used for --pipe) - 'transfersize' => 0, # size of files using --transfer - 'returnsize' => 0, # size of files using --return - 'pid' => undef, - # hash of { SSHLogins => number of times the command failed there } - 'failed' => undef, - 'sshlogin' => undef, - # The commandline wrapped with rsync and ssh - 'sshlogin_wrap' => undef, - 'exitstatus' => undef, - 'exitsignal' => undef, - # Timestamp for timeout if any - 'timeout' => undef, - 'virgin' => 1, - }, ref($class) || $class; -} - -sub replaced { - my $self = shift; - $self->{'commandline'} or ::die_bug("commandline empty"); - return $self->{'commandline'}->replaced(); -} - -sub seq { - my $self = shift; - return $self->{'commandline'}->seq(); -} - -sub slot { - my $self = shift; - return $self->{'commandline'}->slot(); -} - -{ - my($cattail); - - sub cattail { - # Returns: - # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] - if(not $cattail) { - $cattail = q{ - # cat followed by tail. - # If $writerpid dead: finish after this round - use Fcntl; - - $|=1; - - my ($cmd, $writerpid, $read_file, $unlink_file) = @ARGV; - if($read_file) { - open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); - } else { - *IN = *STDIN; - } - - my $flags; - fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags |= O_NONBLOCK; # Add non-blocking to the flags - fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle - open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); - - while(1) { - # clear EOF - seek(IN,0,1); - my $writer_running = kill 0, $writerpid; - $read = sysread(IN,$buf,32768); - if($read) { - # We can unlink the file now: The writer has written something - -e $unlink_file and unlink $unlink_file; - # Blocking print - while($buf) { - my $bytes_written = syswrite(OUT,$buf); - # syswrite may be interrupted by SIGHUP - substr($buf,0,$bytes_written) = ""; - } - # Something printed: Wait less next time - $sleep /= 2; - } else { - if(eof(IN) and not $writer_running) { - # Writer dead: There will never be more to read => exit - exit; - } - # TODO This could probably be done more efficiently using select(2) - # Nothing read: Wait longer before next read - # Up to 30 milliseconds - $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); - usleep($sleep); - } - } - - sub usleep { - # Sleep this many milliseconds. - my $secs = shift; - select(undef, undef, undef, $secs/1000); - } - }; - $cattail =~ s/#.*//mg; - $cattail =~ s/\s+/ /g; - } - return $cattail; - } -} - -sub openoutputfiles { - # Open files for STDOUT and STDERR - # Set file handles in $self->fh - my $self = shift; - my ($outfhw, $errfhw, $outname, $errname); - if($opt::results) { - my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); - # Output in: prefix/name1/val1/name2/val2/stdout - my $dir = $opt::results."/".$args_as_dirname; - if(eval{ File::Path::mkpath($dir); }) { - # OK - } else { - # mkpath failed: Argument probably too long. - # Set $Global::max_file_length, which will keep the individual - # dir names shorter than the max length - max_file_name_length($opt::results); - $args_as_dirname = $self->{'commandline'}->args_as_dirname(); - # prefix/name1/val1/name2/val2/ - $dir = $opt::results."/".$args_as_dirname; - File::Path::mkpath($dir); - } - # prefix/name1/val1/name2/val2/stdout - $outname = "$dir/stdout"; - if(not open($outfhw, "+>", $outname)) { - ::error("Cannot write to `$outname'.\n"); - ::wait_and_exit(255); - } - # prefix/name1/val1/name2/val2/stderr - $errname = "$dir/stderr"; - if(not open($errfhw, "+>", $errname)) { - ::error("Cannot write to `$errname'.\n"); - ::wait_and_exit(255); - } - $self->set_fh(1,"unlink",""); - $self->set_fh(2,"unlink",""); - } elsif(not $opt::ungroup) { - # To group we create temporary files for STDOUT and STDERR - # To avoid the cleanup unlink the files immediately (but keep them open) - if(@Global::tee_jobs) { - # files must be removed when the tee is done - } elsif($opt::files) { - ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); - # --files => only remove stderr - $self->set_fh(1,"unlink",""); - $self->set_fh(2,"unlink",$errname); - } else { - ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); - ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); - $self->set_fh(1,"unlink",$outname); - $self->set_fh(2,"unlink",$errname); - } - } else { - # --ungroup - open($outfhw,">&",$Global::fd{1}) || die; - open($errfhw,">&",$Global::fd{2}) || die; - # File name must be empty as it will otherwise be printed - $outname = ""; - $errname = ""; - $self->set_fh(1,"unlink",$outname); - $self->set_fh(2,"unlink",$errname); - } - # Set writing FD - $self->set_fh(1,'w',$outfhw); - $self->set_fh(2,'w',$errfhw); - $self->set_fh(1,'name',$outname); - $self->set_fh(2,'name',$errname); - if($opt::compress) { - # Send stdout to stdin for $opt::compress_program(1) - # Send stderr to stdin for $opt::compress_program(2) - # cattail get pid: $pid = $self->fh($fdno,'rpid'); - my $cattail = cattail(); - for my $fdno (1,2) { - my $wpid = open(my $fdw,"|-","$opt::compress_program >>". - $self->fh($fdno,'name')) || die $?; - $self->set_fh($fdno,'w',$fdw); - $self->set_fh($fdno,'wpid',$wpid); - my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, - $opt::decompress_program, $wpid, - $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; - $self->set_fh($fdno,'r',$fdr); - $self->set_fh($fdno,'rpid',$rpid); - } - } elsif(not $opt::ungroup) { - # Set reading FD if using --group (--ungroup does not need) - for my $fdno (1,2) { - # Re-open the file for reading - # so fdw can be closed separately - # and fdr can be seeked separately (for --line-buffer) - open(my $fdr,"<", $self->fh($fdno,'name')) || - ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); - $self->set_fh($fdno,'r',$fdr); - # Unlink if required - $Global::debug or unlink $self->fh($fdno,"unlink"); - } - } - if($opt::linebuffer) { - # Set non-blocking when using --linebuffer - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - for my $fdno (1,2) { - my $fdr = $self->fh($fdno,'r'); - my $flags; - fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags |= &O_NONBLOCK; # Add non-blocking to the flags - fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle - } - } -} - -sub max_file_name_length { - # Figure out the max length of a subdir - # TODO and the max total length - # Ext4 = 255,130816 - my $testdir = shift; - - my $upper = 8_000_000; - my $len = 8; - my $dir="x"x$len; - do { - rmdir($testdir."/".$dir); - $len *= 16; - $dir="x"x$len; - } while (mkdir $testdir."/".$dir); - # Then search for the actual max length between $len/16 and $len - my $min = $len/16; - my $max = $len; - while($max-$min > 5) { - # If we are within 5 chars of the exact value: - # it is not worth the extra time to find the exact value - my $test = int(($min+$max)/2); - $dir="x"x$test; - if(mkdir $testdir."/".$dir) { - rmdir($testdir."/".$dir); - $min = $test; - } else { - $max = $test; - } - } - $Global::max_file_length = $min; - return $min; -} - -sub set_fh { - # Set file handle - my ($self, $fd_no, $key, $fh) = @_; - $self->{'fd'}{$fd_no,$key} = $fh; -} - -sub fh { - # Get file handle - my ($self, $fd_no, $key) = @_; - return $self->{'fd'}{$fd_no,$key}; -} - -sub write { - my $self = shift; - my $remaining_ref = shift; - my $stdin_fh = $self->fh(0,"w"); - syswrite($stdin_fh,$$remaining_ref); -} - -sub set_stdin_buffer { - # Copy stdin buffer from $block_ref up to $endpos - # Prepend with $header_ref - # Remove $recstart and $recend if needed - # Input: - # $header_ref = ref to $header to prepend - # $block_ref = ref to $block to pass on - # $endpos = length of $block to pass on - # $recstart = --recstart regexp - # $recend = --recend regexp - # Returns: - # N/A - my $self = shift; - my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; - $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); - if($opt::remove_rec_sep) { - remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); - } - $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; - $self->{'stdin_buffer_pos'} = 0; -} - -sub stdin_buffer_length { - my $self = shift; - return $self->{'stdin_buffer_length'}; -} - -sub remove_rec_sep { - my ($block_ref,$recstart,$recend) = @_; - # Remove record separator - $$block_ref =~ s/$recend$recstart//gos; - $$block_ref =~ s/^$recstart//os; - $$block_ref =~ s/$recend$//os; -} - -sub non_block_write { - my $self = shift; - my $something_written = 0; - use POSIX qw(:errno_h); -# use Fcntl; -# my $flags = ''; - for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { - my $in = $self->fh(0,"w"); -# fcntl($in, F_GETFL, $flags) -# or die "Couldn't get flags for HANDLE : $!\n"; -# $flags |= O_NONBLOCK; -# fcntl($in, F_SETFL, $flags) -# or die "Couldn't set flags for HANDLE: $!\n"; - my $rv = syswrite($in, $buf); - if (!defined($rv) && $! == EAGAIN) { - # would block - $something_written = 0; - } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { - # incomplete write - # Remove the written part - $self->{'stdin_buffer_pos'} += $rv; - $something_written = $rv; - } else { - # successfully wrote everything - my $a=""; - $self->set_stdin_buffer(\$a,\$a,"",""); - $something_written = $rv; - } - } - - ::debug("pipe", "Non-block: ", $something_written); - return $something_written; -} - - -sub virgin { - my $self = shift; - return $self->{'virgin'}; -} - -sub set_virgin { - my $self = shift; - $self->{'virgin'} = shift; -} - -sub pid { - my $self = shift; - return $self->{'pid'}; -} - -sub set_pid { - my $self = shift; - $self->{'pid'} = shift; -} - -sub starttime { - # Returns: - # UNIX-timestamp this job started - my $self = shift; - return sprintf("%.3f",$self->{'starttime'}); -} - -sub set_starttime { - my $self = shift; - my $starttime = shift || ::now(); - $self->{'starttime'} = $starttime; -} - -sub runtime { - # Returns: - # Run time in seconds - my $self = shift; - return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); -} - -sub endtime { - # Returns: - # UNIX-timestamp this job ended - # 0 if not ended yet - my $self = shift; - return ($self->{'endtime'} || 0); -} - -sub set_endtime { - my $self = shift; - my $endtime = shift; - $self->{'endtime'} = $endtime; -} - -sub timedout { - # Is the job timedout? - # Input: - # $delta_time = time that the job may run - # Returns: - # True or false - my $self = shift; - my $delta_time = shift; - return time > $self->{'starttime'} + $delta_time; -} - -sub kill { - # Kill the job. - # Send the signals to (grand)*children and pid. - # If no signals: TERM TERM KILL - # Wait 200 ms after each TERM. - # Input: - # @signals = signals to send - my $self = shift; - my @signals = @_; - my @family_pids = $self->family_pids(); - # Record this jobs as failed - $self->set_exitstatus(-1); - # Send two TERMs to give time to clean up - ::debug("run", "Kill seq ", $self->seq(), "\n"); - my @send_signals = @signals || ("TERM", "TERM", "KILL"); - for my $signal (@send_signals) { - my $alive = 0; - for my $pid (@family_pids) { - if(kill 0, $pid) { - # The job still running - kill $signal, $pid; - $alive = 1; - } - } - # If a signal was given as input, do not do the sleep below - @signals and next; - - if($signal eq "TERM" and $alive) { - # Wait up to 200 ms between TERMs - but only if any pids are alive - my $sleep = 1; - for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; - $sleepsum += $sleep) { - $sleep = ::reap_usleep($sleep); - } - } - } -} - -sub family_pids { - # Find the pids with this->pid as (grand)*parent - # Returns: - # @pids = pids of (grand)*children - my $self = shift; - my $pid = $self->pid(); - my @pids; - - my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); - - my @more = ($pid); - # While more (grand)*children - while(@more) { - my @m; - push @pids, @more; - for my $parent (@more) { - if($children_of_ref->{$parent}) { - # add the children of this parent - push @m, @{$children_of_ref->{$parent}}; - } - } - @more = @m; - } - return (@pids); -} - -sub failed { - # return number of times failed for this $sshlogin - # Input: - # $sshlogin - # Returns: - # Number of times failed for $sshlogin - my $self = shift; - my $sshlogin = shift; - return $self->{'failed'}{$sshlogin}; -} - -sub failed_here { - # return number of times failed for the current $sshlogin - # Returns: - # Number of times failed for this sshlogin - my $self = shift; - return $self->{'failed'}{$self->sshlogin()}; -} - -sub add_failed { - # increase the number of times failed for this $sshlogin - my $self = shift; - my $sshlogin = shift; - $self->{'failed'}{$sshlogin}++; -} - -sub add_failed_here { - # increase the number of times failed for the current $sshlogin - my $self = shift; - $self->{'failed'}{$self->sshlogin()}++; -} - -sub reset_failed { - # increase the number of times failed for this $sshlogin - my $self = shift; - my $sshlogin = shift; - delete $self->{'failed'}{$sshlogin}; -} - -sub reset_failed_here { - # increase the number of times failed for this $sshlogin - my $self = shift; - delete $self->{'failed'}{$self->sshlogin()}; -} - -sub min_failed { - # Returns: - # the number of sshlogins this command has failed on - # the minimal number of times this command has failed - my $self = shift; - my $min_failures = - ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); - my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; - return ($number_of_sshlogins_failed_on,$min_failures); -} - -sub total_failed { - # Returns: - # $total_failures = the number of times this command has failed - my $self = shift; - my $total_failures = 0; - for (values %{$self->{'failed'}}) { - $total_failures += $_; - } - return $total_failures; -} - -sub wrapped { - # Wrap command with: - # * --shellquote - # * --nice - # * --cat - # * --fifo - # * --sshlogin - # * --pipepart (@Global::cat_partials) - # * --pipe - # * --tmux - # The ordering of the wrapping is important: - # * --nice/--cat/--fifo should be done on the remote machine - # * --pipepart/--pipe should be done on the local machine inside --tmux - # Uses: - # $Global::envvar - # $opt::shellquote - # $opt::nice - # $Global::shell - # $opt::cat - # $opt::fifo - # @Global::cat_partials - # $opt::pipe - # $opt::tmux - # Returns: - # $self->{'wrapped'} = the command wrapped with the above - my $self = shift; - if(not defined $self->{'wrapped'}) { - my $command = $Global::envvar.$self->replaced(); - if($opt::shellquote) { - # Prepend echo - # and quote twice - $command = "echo " . - ::shell_quote_scalar(::shell_quote_scalar($command)); - } - if($opt::nice) { - # Prepend \nice -n19 $SHELL -c - # and quote. - # The '\' before nice is needed to avoid tcsh's built-in - $command = '\nice'. " -n". $opt::nice. " ". - $Global::shell. " -c ". - ::shell_quote_scalar($command); - } - if($opt::cat) { - # Prepend 'cat > {};' - # Append '_EXIT=$?;(rm {};exit $_EXIT)' - $command = - $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). - $command. - $self->{'commandline'}->replace_placeholders( - ["; _EXIT=\$?; rm \257<\257>; exit \$_EXIT"], 0, 0); - } elsif($opt::fifo) { - # Prepend 'mkfifo {}; (' - # Append ') & _PID=$!; cat > {}; wait $_PID; _EXIT=$?;(rm {};exit $_EXIT)' - $command = - $self->{'commandline'}->replace_placeholders(["mkfifo \257<\257>; ("], 0, 0). - $command. - $self->{'commandline'}->replace_placeholders([") & _PID=\$!; cat > \257<\257>; ", - "wait \$_PID; _EXIT=\$?; ", - "rm \257<\257>; exit \$_EXIT"], - 0,0); - } - # Wrap with ssh + tranferring of files - $command = $self->sshlogin_wrap($command); - if(@Global::cat_partials) { - # Prepend: - # < /tmp/foo perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' 0 0 0 11 | - $command = (shift @Global::cat_partials). "|". "(". $command. ")"; - } elsif($opt::pipe) { - # Prepend EOF-detector to avoid starting $command if EOF. - # The $tmpfile might exist if run on a remote system - we accept that risk - my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".chr"); - # Unlink to avoid leaving files if --dry-run or --sshlogin - unlink $tmpfile; - $command = - # Exit value: - # empty input = true - # some input = exit val from command - qq{ sh -c 'dd bs=1 count=1 of=$tmpfile 2>/dev/null'; }. - qq{ test \! -s "$tmpfile" && rm -f "$tmpfile" && exec true; }. - qq{ (cat $tmpfile; rm $tmpfile; cat - ) | }. - "($command);"; - } - if($opt::tmux) { - # Wrap command with 'tmux' - $command = $self->tmux_wrap($command); - } - $self->{'wrapped'} = $command; - } - return $self->{'wrapped'}; -} - -sub set_sshlogin { - my $self = shift; - my $sshlogin = shift; - $self->{'sshlogin'} = $sshlogin; - delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong - delete $self->{'wrapped'}; -} - -sub sshlogin { - my $self = shift; - return $self->{'sshlogin'}; -} - -sub sshlogin_wrap { - # Wrap the command with the commands needed to run remotely - # Returns: - # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands - my $self = shift; - my $command = shift; - if(not defined $self->{'sshlogin_wrap'}) { - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my ($pre,$post,$cleanup)=("","",""); - - if($serverlogin eq ":") { - # No transfer neeeded - $self->{'sshlogin_wrap'} = $command; - } else { - # --transfer - $pre .= $self->sshtransfer(); - # --return - $post .= $self->sshreturn(); - # --cleanup - $post .= $self->sshcleanup(); - if($post) { - # We need to save the exit status of the job - $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; - } - # If the remote login shell is (t)csh then use 'setenv' - # otherwise use 'export' - # We cannot use parse_env_var(), as PARALLEL_SEQ changes - # for each command - my $parallel_env = - ($Global::envwarn - . q{ 'eval `echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null } - . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; } - . q{ setenv PARALLEL_PID '$PARALLEL_PID' } - . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; } - . q{ PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' }); - my $remote_pre = ""; - my $ssh_options = ""; - if(($opt::pipe or $opt::pipepart) and $opt::ctrlc - or - not ($opt::pipe or $opt::pipepart) and not $opt::noctrlc) { - # TODO Determine if this is needed - # Propagating CTRL-C to kill remote jobs requires - # remote jobs to be run with a terminal. - $ssh_options = "-tt -oLogLevel=quiet"; -# $ssh_options = ""; - # tty - check if we have a tty. - # stty: - # -onlcr - make output 8-bit clean - # isig - pass CTRL-C as signal - # -echo - do not echo input - $remote_pre .= ::shell_quote_scalar('tty >/dev/null && stty isig -onlcr -echo;'); - } - if($opt::workdir) { - my $wd = ::shell_quote_file($self->workdir()); - $remote_pre .= ::shell_quote_scalar("mkdir -p ") . $wd . - ::shell_quote_scalar("; cd ") . $wd . - # exit 255 (instead of exec false) would be the correct thing, - # but that fails on tcsh - ::shell_quote_scalar(qq{ || exec false;}); - } - # This script is to solve the problem of - # * not mixing STDERR and STDOUT - # * terminating with ctrl-c - # It works on Linux but not Solaris - # Finishes on Solaris, but wrong exit code: - # $SIG{CHLD} = sub {exit ($?&127 ? 128+($?&127) : 1+$?>>8)}; - # Hangs on Solaris, but correct exit code on Linux: - # $SIG{CHLD} = sub { $done = 1 }; - # $p->poll; - my $signal_script = "perl -e '". - q{ - use IO::Poll; - $SIG{CHLD} = sub { $done = 1 }; - $p = IO::Poll->new; - $p->mask(STDOUT, POLLHUP); - $pid=fork; unless($pid) {setpgrp; exec $ENV{SHELL}, "-c", @ARGV; die "exec: $!\n"} - $p->poll; - kill SIGHUP, -${pid} unless $done; - wait; exit ($?&127 ? 128+($?&127) : 1+$?>>8) - } . "' "; - $signal_script =~ s/\s+/ /g; - - $self->{'sshlogin_wrap'} = - ($pre - . "$sshcmd $ssh_options $serverlogin $parallel_env " - . $remote_pre -# . ::shell_quote_scalar($signal_script . ::shell_quote_scalar($command)) - . ::shell_quote_scalar($command) - . ";" - . $post); - } - } - return $self->{'sshlogin_wrap'}; -} - -sub transfer { - # Files to transfer - # Returns: - # @transfer - File names of files to transfer - my $self = shift; - my @transfer = (); - $self->{'transfersize'} = 0; - if($opt::transfer) { - for my $record (@{$self->{'commandline'}{'arg_list'}}) { - # Merge arguments from records into args - for my $arg (@$record) { - CORE::push @transfer, $arg->orig(); - # filesize - if(-e $arg->orig()) { - $self->{'transfersize'} += (stat($arg->orig()))[7]; - } - } - } - } - return @transfer; -} - -sub transfersize { - my $self = shift; - return $self->{'transfersize'}; -} - -sub sshtransfer { - # Returns for each transfer file: - # rsync $file remote:$workdir - my $self = shift; - my @pre; - my $sshlogin = $self->sshlogin(); - my $workdir = $self->workdir(); - for my $file ($self->transfer()) { - push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; - } - return join("",@pre); -} - -sub return { - # Files to return - # Non-quoted and with {...} substituted - # Returns: - # @non_quoted_filenames - my $self = shift; - return $self->{'commandline'}-> - replace_placeholders($self->{'commandline'}{'return_files'},0,0); -} - -sub returnsize { - # This is called after the job has finished - # Returns: - # $number_of_bytes transferred in return - my $self = shift; - for my $file ($self->return()) { - if(-e $file) { - $self->{'returnsize'} += (stat($file))[7]; - } - } - return $self->{'returnsize'}; -} - -sub sshreturn { - # Returns for each return-file: - # rsync remote:$workdir/$file . - my $self = shift; - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); - my $pre = ""; - for my $file ($self->return()) { - $file =~ s:^\./::g; # Remove ./ if any - my $relpath = ($file !~ m:^/:); # Is the path relative? - my $cd = ""; - my $wd = ""; - if($relpath) { - # rsync -avR /foo/./bar/baz.c remote:/tmp/ - # == (on old systems) - # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ - $wd = ::shell_quote_file($self->workdir()."/"); - } - # Only load File::Basename if actually needed - $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; - # dir/./file means relative to dir, so remove dir on remote - $file =~ m:(.*)/\./:; - my $basedir = $1 ? ::shell_quote_file($1."/") : ""; - my $nobasedir = $file; - $nobasedir =~ s:.*/\./::; - $cd = ::shell_quote_file(::dirname($nobasedir)); - my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); - my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); - # --return - # mkdir -p /home/tange/dir/subdir/; - # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" - # server:file.gz /home/tange/dir/subdir/ - $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". - $basename . " ".$basedir.$cd.";"; - } - return $pre; -} - -sub sshcleanup { - # Return the sshcommand needed to remove the file - # Returns: - # ssh command needed to remove files from sshlogin - my $self = shift; - my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $workdir = $self->workdir(); - my $cleancmd = ""; - - for my $file ($self->cleanup()) { - my @subworkdirs = parentdirs_of($file); - $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; - } - if(defined $opt::workdir and $opt::workdir eq "...") { - $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; - } - return $cleancmd; -} - -sub cleanup { - # Returns: - # Files to remove at cleanup - my $self = shift; - if($opt::cleanup) { - my @transfer = $self->transfer(); - my @return = $self->return(); - return (@transfer,@return); - } else { - return (); - } -} - -sub workdir { - # Returns: - # the workdir on a remote machine - my $self = shift; - if(not defined $self->{'workdir'}) { - my $workdir; - if(defined $opt::workdir) { - if($opt::workdir eq ".") { - # . means current dir - my $home = $ENV{'HOME'}; - eval 'use Cwd'; - my $cwd = cwd(); - $workdir = $cwd; - if($home) { - # If homedir exists: remove the homedir from - # workdir if cwd starts with homedir - # E.g. /home/foo/my/dir => my/dir - # E.g. /tmp/my/dir => /tmp/my/dir - my ($home_dev, $home_ino) = (stat($home))[0,1]; - my $parent = ""; - my @dir_parts = split(m:/:,$cwd); - my $part; - while(defined ($part = shift @dir_parts)) { - $part eq "" and next; - $parent .= "/".$part; - my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; - if($parent_dev == $home_dev and $parent_ino == $home_ino) { - # dev and ino is the same: We found the homedir. - $workdir = join("/",@dir_parts); - last; - } - } - } - if($workdir eq "") { - $workdir = "."; - } - } elsif($opt::workdir eq "...") { - $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ - . "-" . $self->seq(); - } else { - $workdir = $opt::workdir; - # Rsync treats /./ special. We don't want that - $workdir =~ s:/\./:/:g; # Remove /./ - $workdir =~ s:/+$::; # Remove ending / if any - $workdir =~ s:^\./::g; # Remove starting ./ if any - } - } else { - $workdir = "."; - } - $self->{'workdir'} = ::shell_quote_scalar($workdir); - } - return $self->{'workdir'}; -} - -sub parentdirs_of { - # Return: - # all parentdirs except . of this dir or file - sorted desc by length - my $d = shift; - my @parents = (); - while($d =~ s:/[^/]+$::) { - if($d ne ".") { - push @parents, $d; - } - } - return @parents; -} - -sub start { - # Setup STDOUT and STDERR for a job and start it. - # Returns: - # job-object or undef if job not to run - my $job = shift; - # Get the shell command to be executed (possibly with ssh infront). - my $command = $job->wrapped(); - - if($Global::interactive or $Global::stderr_verbose) { - if($Global::interactive) { - print $Global::original_stderr "$command ?..."; - open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); - my $answer = <$tty_fh>; - close $tty_fh; - my $run_yes = ($answer =~ /^\s*y/i); - if (not $run_yes) { - $command = "true"; # Run the command 'true' - } - } else { - print $Global::original_stderr "$command\n"; - } - } - - my $pid; - $job->openoutputfiles(); - my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); - local (*IN,*OUT,*ERR); - open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); - open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); - - if(($opt::dryrun or $Global::verbose) and $opt::ungroup) { - if($Global::verbose <= 1) { - print $stdout_fh $job->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print $stdout_fh $command,"\n"; - } - } - if($opt::dryrun) { - $command = "true"; - } - $ENV{'PARALLEL_SEQ'} = $job->seq(); - $ENV{'PARALLEL_PID'} = $$; - ::debug("run", $Global::total_running, " processes . Starting (", - $job->seq(), "): $command\n"); - if($opt::pipe) { - my ($stdin_fh); - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-pipe"); - 1; - }; - $job->set_fh(0,"w",$stdin_fh); - } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 - and $job->sshlogin()->string() eq ":") { - # Give STDIN to the first job if using -a (but only if running - # locally - otherwise CTRL-C does not work for other jobs Bug#36585) - *IN = *STDIN; - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-a"); - 1; - }; - # Re-open to avoid complaining - open(STDIN, "<&", $Global::original_stdin) - or ::die_bug("dup-\$Global::original_stdin: $!"); - } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and - open(my $devtty_fh, "<", "/dev/tty")) { - # Give /dev/tty to the command if no one else is using it - *IN = $devtty_fh; - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-/dev/tty"); - $Global::tty_taken = $pid; - close $devtty_fh; - 1; - }; - } else { - # The eval is needed to catch exception from open3 - eval { - $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || - ::die_bug("open3-gensym"); - 1; - }; - } - if($pid) { - # A job was started - $Global::total_running++; - $Global::total_started++; - $job->set_pid($pid); - $job->set_starttime(); - $Global::running{$job->pid()} = $job; - if($opt::timeout) { - $Global::timeoutq->insert($job); - } - $Global::newest_job = $job; - $Global::newest_starttime = ::now(); - return $job; - } else { - # No more processes - ::debug("run", "Cannot spawn more jobs.\n"); - return undef; - } -} - -sub tmux_wrap { - # Wrap command with tmux for session pPID - # Input: - # $actual_command = the actual command being run (incl ssh wrap) - my $self = shift; - my $actual_command = shift; - # Temporary file name. Used for fifo to communicate exit val - my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".tmx"); - $Global::unlink{$tmpfile}=1; - close $fh; - unlink $tmpfile; - my $visual_command = $self->replaced(); - my $title = $visual_command; - # ; causes problems - # ascii 194-245 annoys tmux - $title =~ tr/[\011-\016;\302-\365]//d; - - my $tmux; - if($Global::total_running == 0) { - $tmux = "tmux new-session -s p$$ -d -n ". - ::shell_quote_scalar($title); - print $Global::original_stderr "See output with: tmux attach -t p$$\n"; - } else { - $tmux = "tmux new-window -t p$$ -n ".::shell_quote_scalar($title); - } - return "mkfifo $tmpfile; $tmux ". - # Run in tmux - ::shell_quote_scalar( - "(".$actual_command.');(echo $?$status;echo 255) >'.$tmpfile."&". - "echo ".::shell_quote_scalar($visual_command).";". - "echo \007Job finished at: `date`;sleep 10"). - # Run outside tmux - # Read the first line from the fifo and use that as status code - "; exit `perl -ne 'unlink \$ARGV; 1..1 and print' $tmpfile` "; -} - -sub is_already_in_results { - # Do we already have results for this job? - # Returns: - # $job_already_run = bool whether there is output for this or not - my $job = $_[0]; - my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); - # prefix/name1/val1/name2/val2/ - my $dir = $opt::results."/".$args_as_dirname; - ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); - return -e "$dir/stdout"; -} - -sub is_already_in_joblog { - my $job = shift; - return vec($Global::job_already_run,$job->seq(),1); -} - -sub set_job_in_joblog { - my $job = shift; - vec($Global::job_already_run,$job->seq(),1) = 1; -} - -sub should_be_retried { - # Should this job be retried? - # Returns - # 0 - do not retry - # 1 - job queued for retry - my $self = shift; - if (not $opt::retries) { - return 0; - } - if(not $self->exitstatus()) { - # Completed with success. If there is a recorded failure: forget it - $self->reset_failed_here(); - return 0 - } else { - # The job failed. Should it be retried? - $self->add_failed_here(); - if($self->total_failed() == $opt::retries) { - # This has been retried enough - return 0; - } else { - # This command should be retried - $self->set_endtime(undef); - $Global::JobQueue->unget($self); - ::debug("run", "Retry ", $self->seq(), "\n"); - return 1; - } - } -} - -sub print { - # Print the output of the jobs - # Returns: N/A - - my $self = shift; - ::debug("print", ">>joboutput ", $self->replaced(), "\n"); - if($opt::dryrun) { - # Nothing was printed to this job: - # cleanup tmp files if --files was set - unlink $self->fh(1,"name"); - } - if($opt::pipe and $self->virgin()) { - # Skip --joblog, --dryrun, --verbose - } else { - if($Global::joblog and defined $self->{'exitstatus'}) { - # Add to joblog when finished - $self->print_joblog(); - } - - # Printing is only relevant for grouped/--line-buffer output. - $opt::ungroup and return; - # Check for disk full - exit_if_disk_full(); - - if(($opt::dryrun or $Global::verbose) - and - not $self->{'verbose_printed'}) { - $self->{'verbose_printed'}++; - if($Global::verbose <= 1) { - print STDOUT $self->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print STDOUT $self->wrapped(),"\n"; - } - # If STDOUT and STDERR are merged, - # we want the command to be printed first - # so flush to avoid STDOUT being buffered - flush STDOUT; - } - } - for my $fdno (sort { $a <=> $b } keys %Global::fd) { - # Sort by file descriptor numerically: 1,2,3,..,9,10,11 - $fdno == 0 and next; - my $out_fd = $Global::fd{$fdno}; - my $in_fh = $self->fh($fdno,"r"); - if(not $in_fh) { - if(not $Job::file_descriptor_warning_printed{$fdno}++) { - # ::warning("File descriptor $fdno not defined\n"); - } - next; - } - ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):"); - if($opt::files) { - # If --compress: $in_fh must be closed first. - close $self->fh($fdno,"w"); - close $in_fh; - if($opt::pipe and $self->virgin()) { - # Nothing was printed to this job: - # cleanup unused tmp files if --files was set - for my $fdno (1,2) { - unlink $self->fh($fdno,"name"); - unlink $self->fh($fdno,"unlink"); - } - } elsif($fdno == 1 and $self->fh($fdno,"name")) { - print $out_fd $self->fh($fdno,"name"),"\n"; - } - } elsif($opt::linebuffer) { - # Line buffered print out - $self->linebuffer_print($fdno,$in_fh,$out_fd); - } else { - my $buf; - close $self->fh($fdno,"w"); - seek $in_fh, 0, 0; - # $in_fh is now ready for reading at position 0 - if($opt::tag or defined $opt::tagstring) { - my $tag = $self->tag(); - if($fdno == 2) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - while(<$in_fh>) { - if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { - # Skip - } else { - print $out_fd $tag,$_; - } - # At most run the loop once - last; - } - } - while(<$in_fh>) { - print $out_fd $tag,$_; - } - } else { - my $buf; - if($fdno == 2) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - sysread($in_fh,$buf,1_000); - $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; - print $out_fd $buf; - } - while(sysread($in_fh,$buf,32768)) { - print $out_fd $buf; - } - } - close $in_fh; - } - flush $out_fd; - } - ::debug("print", "<{'partial_line',$fdno}; - - if(defined $self->{'exitstatus'}) { - # If the job is dead: close printing fh. Needed for --compress - close $self->fh($fdno,"w"); - if($opt::compress) { - # Blocked reading in final round - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - for my $fdno (1,2) { - my $fdr = $self->fh($fdno,'r'); - my $flags; - fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle - $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags - fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle - } - } - } - # This seek will clear EOF - seek $in_fh, tell($in_fh), 0; - # The read is non-blocking: The $in_fh is set to non-blocking. - # 32768 --tag = 5.1s - # 327680 --tag = 4.4s - # 1024000 --tag = 4.4s - # 3276800 --tag = 4.3s - # 32768000 --tag = 4.7s - # 10240000 --tag = 4.3s - while(read($in_fh,substr($$partial,length $$partial),3276800)) { - # Append to $$partial - # Find the last \n - my $i = rindex($$partial,"\n"); - if($i != -1) { - # One or more complete lines were found - if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { - # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt - # This is a crappy way of ignoring it. - $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; - # Length of partial line has changed: Find the last \n again - $i = rindex($$partial,"\n"); - } - if($opt::tag or defined $opt::tagstring) { - # Replace ^ with $tag within the full line - my $tag = $self->tag(); - substr($$partial,0,$i+1) =~ s/^/$tag/gm; - # Length of partial line has changed: Find the last \n again - $i = rindex($$partial,"\n"); - } - # Print up to and including the last \n - print $out_fd substr($$partial,0,$i+1); - # Remove the printed part - substr($$partial,0,$i+1)=""; - } - } - if(defined $self->{'exitstatus'}) { - # If the job is dead: print the remaining partial line - # read remaining - if($$partial and ($opt::tag or defined $opt::tagstring)) { - my $tag = $self->tag(); - $$partial =~ s/^/$tag/gm; - } - print $out_fd $$partial; - # Release the memory - $$partial = undef; - if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { - # decompress still running - } else { - # decompress done: close fh - close $in_fh; - } - } -} - -sub print_joblog { - my $self = shift; - my $cmd; - if($Global::verbose <= 1) { - $cmd = $self->replaced(); - } else { - # Verbose level > 1: Print the rsync and stuff - $cmd = "@command"; - } - print $Global::joblog - join("\t", $self->seq(), $self->sshlogin()->string(), - $self->starttime(), sprintf("%10.3f",$self->runtime()), - $self->transfersize(), $self->returnsize(), - $self->exitstatus(), $self->exitsignal(), $cmd - ). "\n"; - flush $Global::joblog; - $self->set_job_in_joblog(); -} - -sub tag { - my $self = shift; - if(not defined $self->{'tag'}) { - $self->{'tag'} = $self->{'commandline'}-> - replace_placeholders([$opt::tagstring],0,0)."\t"; - } - return $self->{'tag'}; -} - -sub hostgroups { - my $self = shift; - if(not defined $self->{'hostgroups'}) { - $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; - } - return @{$self->{'hostgroups'}}; -} - -sub exitstatus { - my $self = shift; - return $self->{'exitstatus'}; -} - -sub set_exitstatus { - my $self = shift; - my $exitstatus = shift; - if($exitstatus) { - # Overwrite status if non-zero - $self->{'exitstatus'} = $exitstatus; - } else { - # Set status but do not overwrite - # Status may have been set by --timeout - $self->{'exitstatus'} ||= $exitstatus; - } -} - -sub exitsignal { - my $self = shift; - return $self->{'exitsignal'}; -} - -sub set_exitsignal { - my $self = shift; - my $exitsignal = shift; - $self->{'exitsignal'} = $exitsignal; -} - -{ - my ($disk_full_fh, $b8193, $name); - sub exit_if_disk_full { - # Checks if $TMPDIR is full by writing 8kb to a tmpfile - # If the disk is full: Exit immediately. - # Returns: - # N/A - if(not $disk_full_fh) { - ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); - unlink $name; - $b8193 = "x"x8193; - } - # Linux does not discover if a disk is full if writing <= 8192 - # Tested on: - # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos - # ntfs reiserfs tmpfs ubifs vfat xfs - # TODO this should be tested on different OS similar to this: - # - # doit() { - # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop - # seq 100000 | parallel --tmpdir /mnt/loop/ true & - # seq 6900000 > /mnt/loop/i && echo seq OK - # seq 6980868 > /mnt/loop/i - # seq 10000 > /mnt/loop/ii - # sleep 3 - # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ - # echo >&2 - # } - print $disk_full_fh $b8193; - if(not $disk_full_fh - or - tell $disk_full_fh == 0) { - ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); - ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); - ::wait_and_exit(255); - } - truncate $disk_full_fh, 0; - seek($disk_full_fh, 0, 0) || die; - } -} - - -package CommandLine; - -sub new { - my $class = shift; - my $seq = shift; - my $commandref = shift; - $commandref || die; - my $arg_queue = shift; - my $context_replace = shift; - my $max_number_of_args = shift; # for -N and normal (-n1) - my $return_files = shift; - my $replacecount_ref = shift; - my $len_ref = shift; - my %replacecount = %$replacecount_ref; - my %len = %$len_ref; - for (keys %$replacecount_ref) { - # Total length of this replacement string {} replaced with all args - $len{$_} = 0; - } - return bless { - 'command' => $commandref, - 'seq' => $seq, - 'len' => \%len, - 'arg_list' => [], - 'arg_queue' => $arg_queue, - 'max_number_of_args' => $max_number_of_args, - 'replacecount' => \%replacecount, - 'context_replace' => $context_replace, - 'return_files' => $return_files, - 'replaced' => undef, - }, ref($class) || $class; -} - -sub seq { - my $self = shift; - return $self->{'seq'}; -} - -{ - my $max_slot_number; - - sub slot { - # Find the number of a free job slot and return it - # Uses: - # @Global::slots - # Returns: - # $jobslot = number of jobslot - my $self = shift; - if(not $self->{'slot'}) { - if(not @Global::slots) { - # $Global::max_slot_number will typically be $Global::max_jobs_running - push @Global::slots, ++$max_slot_number; - } - $self->{'slot'} = shift @Global::slots; - } - return $self->{'slot'}; - } -} - -sub populate { - # Add arguments from arg_queue until the number of arguments or - # max line length is reached - # Uses: - # $Global::minimal_command_line_length - # $opt::cat - # $opt::fifo - # $Global::JobQueue - # $opt::m - # $opt::X - # $CommandLine::already_spread - # $Global::max_jobs_running - # Returns: N/A - my $self = shift; - my $next_arg; - my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); - - if($opt::cat or $opt::fifo) { - # Generate a tempfile name that will be used as {} - my($outfh,$name) = ::tmpfile(SUFFIX => ".pip"); - close $outfh; - # Unlink is needed if: ssh otheruser@localhost - unlink $name; - $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget([Arg->new($name)]); - } - - while (not $self->{'arg_queue'}->empty()) { - $next_arg = $self->{'arg_queue'}->get(); - if(not defined $next_arg) { - next; - } - $self->push($next_arg); - if($self->len() >= $max_len) { - # Command length is now > max_length - # If there are arguments: remove the last - # If there are no arguments: Error - # TODO stuff about -x opt_x - if($self->number_of_args() > 1) { - # There is something to work on - $self->{'arg_queue'}->unget($self->pop()); - last; - } else { - my $args = join(" ", map { $_->orig() } @$next_arg); - ::error("Command line too long (", - $self->len(), " >= ", - $max_len, - ") at number ", - $self->{'arg_queue'}->arg_number(), - ": ". - (substr($args,0,50))."...\n"); - $self->{'arg_queue'}->unget($self->pop()); - ::wait_and_exit(255); - } - } - - if(defined $self->{'max_number_of_args'}) { - if($self->number_of_args() >= $self->{'max_number_of_args'}) { - last; - } - } - } - if(($opt::m or $opt::X) and not $CommandLine::already_spread - and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { - # -m or -X and EOF => Spread the arguments over all jobslots - # (unless they are already spread) - $CommandLine::already_spread ||= 1; - if($self->number_of_args() > 1) { - $self->{'max_number_of_args'} = - ::ceil($self->number_of_args()/$Global::max_jobs_running); - $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = - $self->{'max_number_of_args'}; - $self->{'arg_queue'}->unget($self->pop_all()); - while($self->number_of_args() < $self->{'max_number_of_args'}) { - $self->push($self->{'arg_queue'}->get()); - } - } - } -} - -sub push { - # Add one or more records as arguments - # Returns: N/A - my $self = shift; - my $record = shift; - push @{$self->{'arg_list'}}, $record; - - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - my $rep; - for my $arg (@$record) { - if(defined $arg) { - for my $perlexpr (keys %{$self->{'replacecount'}}) { - # 50% faster than below - $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); - # $rep = $arg->replace($perlexpr,$quote_arg,$self); - # $self->{'len'}{$perlexpr} += length $rep; - # ::debug("length", "Length: ", length $rep, - # "(", $perlexpr, "=>", $rep, ")\n"); - } - } - } -} - -sub pop { - # Remove last argument - # Returns: - # the last record - my $self = shift; - my $record = pop @{$self->{'arg_list'}}; - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - for my $arg (@$record) { - if(defined $arg) { - for my $perlexpr (keys %{$self->{'replacecount'}}) { - $self->{'len'}{$perlexpr} -= - length $arg->replace($perlexpr,$quote_arg,$self); - } - } - } - return $record; -} - -sub pop_all { - # Remove all arguments and zeros the length of replacement strings - # Returns: - # all records - my $self = shift; - my @popped = @{$self->{'arg_list'}}; - for my $replacement_string (keys %{$self->{'replacecount'}}) { - $self->{'len'}{$replacement_string} = 0; - } - $self->{'arg_list'} = []; - return @popped; -} - -sub number_of_args { - # The number of records - # Returns: - # number of records - my $self = shift; - # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd - # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az - # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq - # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. - # - # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG - # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq - # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq - # eagdoq oapq. - # - # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz - # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq - # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq - # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq - # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq - # eagdoq oapq U daf13'qp ftq eagdoq oapq - # tffb://qz.iuwubqpum.ads/iuwu/DAF13 - # - # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk - # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq - # tmp fa nq daf13'qp. - # - # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita - # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq - # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. - # - # This is really the number of records - return $#{$self->{'arg_list'}}+1; -} - -sub number_of_recargs { - # The number of args in records - # Returns: - # number of args records - my $self = shift; - my $sum = 0; - my $nrec = scalar @{$self->{'arg_list'}}; - if($nrec) { - $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); - } - return $sum; -} - -sub args_as_string { - # Returns: - # all unmodified arguments joined with ' ' (similar to {}) - my $self = shift; - return (join " ", map { $_->orig() } - map { @$_ } @{$self->{'arg_list'}}); -} - -sub args_as_dirname { - # Returns: - # all unmodified arguments joined with '/' (similar to {}) - # \t \0 \\ and / are quoted as: \t \0 \\ \_ - # If $Global::max_file_length: Keep subdirs < $Global::max_file_length - my $self = shift; - my @res = (); - - for my $rec_ref (@{$self->{'arg_list'}}) { - # If headers are used, sort by them. - # Otherwise keep the order from the command line. - my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); - for my $n (@header_indexes_sorted) { - CORE::push(@res, - $Global::input_source_header{$n}, - map { my $s = $_; - # \t \0 \\ and / are quoted as: \t \0 \\ \_ - $s =~ s/\\/\\\\/g; - $s =~ s/\t/\\t/g; - $s =~ s/\0/\\0/g; - $s =~ s:/:\\_:g; - if($Global::max_file_length) { - # Keep each subdir shorter than the longest - # allowed file name - $s = substr($s,0,$Global::max_file_length); - } - $s; } - $rec_ref->[$n-1]->orig()); - } - } - return join "/", @res; -} - -sub header_indexes_sorted { - # Sort headers first by number then by name. - # E.g.: 1a 1b 11a 11b - # Returns: - # Indexes of %Global::input_source_header sorted - my $max_col = shift; - - no warnings 'numeric'; - for my $col (1 .. $max_col) { - # Make sure the header is defined. If it is not: use column number - if(not defined $Global::input_source_header{$col}) { - $Global::input_source_header{$col} = $col; - } - } - my @header_indexes_sorted = sort { - # Sort headers numerically then asciibetically - $Global::input_source_header{$a} <=> $Global::input_source_header{$b} - or - $Global::input_source_header{$a} cmp $Global::input_source_header{$b} - } 1 .. $max_col; - return @header_indexes_sorted; -} - -sub len { - # Uses: - # $opt::shellquote - # The length of the command line with args substituted - my $self = shift; - my $len = 0; - # Add length of the original command with no args - # Length of command w/ all replacement args removed - $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; - ::debug("length", "noncontext + command: $len\n"); - my $recargs = $self->number_of_recargs(); - if($self->{'context_replace'}) { - # Context is duplicated for each arg - $len += $recargs * $self->{'len'}{'context'}; - for my $replstring (keys %{$self->{'replacecount'}}) { - # If the replacements string is more than once: mulitply its length - $len += $self->{'len'}{$replstring} * - $self->{'replacecount'}{$replstring}; - ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", - $self->{'replacecount'}{$replstring}, "\n"); - } - # echo 11 22 33 44 55 66 77 88 99 1010 - # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 - # 5 + ctxgrp*arg - ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, - " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); - # Add space between context groups - $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); - } else { - # Each replacement string may occur several times - # Add the length for each time - $len += 1*$self->{'len'}{'context'}; - ::debug("length", "context+noncontext + command: $len\n"); - for my $replstring (keys %{$self->{'replacecount'}}) { - # (space between regargs + length of replacement) - # * number this replacement is used - $len += ($recargs -1 + $self->{'len'}{$replstring}) * - $self->{'replacecount'}{$replstring}; - } - } - if($opt::nice) { - # Pessimistic length if --nice is set - # Worse than worst case: every char needs to be quoted with \ - $len *= 2; - } - if($Global::quoting) { - # Pessimistic length if -q is set - # Worse than worst case: every char needs to be quoted with \ - $len *= 2; - } - if($opt::shellquote) { - # Pessimistic length if --shellquote is set - # Worse than worst case: every char needs to be quoted with \ twice - $len *= 4; - } - # If we are using --env, add the prefix for that, too. - $len += $Global::envvarlen; - - return $len; -} - -sub replaced { - # Uses: - # $Global::noquote - # $Global::quoting - # Returns: - # $replaced = command with place holders replaced and prepended - my $self = shift; - if(not defined $self->{'replaced'}) { - # Don't quote arguments if the input is the full command line - my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; - $self->{'replaced'} = $self->replace_placeholders($self->{'command'},$Global::quoting,$quote_arg); - my $len = length $self->{'replaced'}; - if ($len != $self->len()) { - ::debug("length", $len, " != ", $self->len(), " ", $self->{'replaced'}, "\n"); - } else { - ::debug("length", $len, " == ", $self->len(), " ", $self->{'replaced'}, "\n"); - } - } - return $self->{'replaced'}; -} - -sub replace_placeholders { - # Replace foo{}bar with fooargbar - # Input: - # $targetref = command as shell words - # $quote = should everything be quoted? - # $quote_arg = should replaced arguments be quoted? - # Returns: - # @target with placeholders replaced - my $self = shift; - my $targetref = shift; - my $quote = shift; - my $quote_arg = shift; - my $context_replace = $self->{'context_replace'}; - my @target = @$targetref; - ::debug("replace", "Replace @target\n"); - # -X = context replace - # maybe multiple input sources - # maybe --xapply - if(not @target) { - # @target is empty: Return empty array - return @target; - } - # Fish out the words that have replacement strings in them - my %word; - for (@target) { - my $tt = $_; - ::debug("replace", "Target: $tt"); - # a{1}b{}c{}d - # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d - # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d - # A B C => aAbA B CcA B Cd - # -X A B C => aAbAcAd aAbBcBd aAbCcCd - - if($context_replace) { - while($tt =~ s/([^\s\257]* # before {= - (?: - \257< # {= - [^\257]*? # The perl expression - \257> # =} - [^\s\257]* # after =} - )+)/ /x) { - # $1 = pre \257 perlexpr \257 post - $word{"$1"} ||= 1; - } - } else { - while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { - # $f = \257 perlexpr \257 - $word{$1} ||= 1; - } - } - } - my @word = keys %word; - - my %replace; - my @arg; - for my $record (@{$self->{'arg_list'}}) { - # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] - # Merge arg-objects from records into @arg for easy access - CORE::push @arg, @$record; - } - # Add one arg if empty to allow {#} and {%} to be computed only once - if(not @arg) { @arg = (Arg->new("")); } - # Number of arguments - used for positional arguments - my $n = $#_+1; - - # This is actually a CommandLine-object, - # but it looks nice to be able to say {= $job->slot() =} - my $job = $self; - for my $word (@word) { - # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF - my $w = $word; - ::debug("replace", "Replacing in $w\n"); - - # Replace positional arguments - $w =~ s< ([^\s\257]*) # before {= - \257< # {= - (-?\d+) # Position (eg. -2 or 3) - ([^\257]*?) # The perl expression - \257> # =} - ([^\s\257]*) # after =} - > - { $1. # Context (pre) - ( - $arg[$2 > 0 ? $2-1 : $n+$2] ? # If defined: replace - $arg[$2 > 0 ? $2-1 : $n+$2]->replace($3,$quote_arg,$self) - : "") - .$4 }egx;# Context (post) - ::debug("replace", "Positional replaced $word with: $w\n"); - - if($w !~ /\257/) { - # No more replacement strings in $w: No need to do more - if($quote) { - CORE::push(@{$replace{::shell_quote($word)}}, $w); - } else { - CORE::push(@{$replace{$word}}, $w); - } - next; - } - # for each arg: - # compute replacement for each string - # replace replacement strings with replacement in the word value - # push to replace word value - ::debug("replace", "Positional done: $w\n"); - for my $arg (@arg) { - my $val = $w; - my $number_of_replacements = 0; - for my $perlexpr (keys %{$self->{'replacecount'}}) { - # Replace {= perl expr =} with value for each arg - $number_of_replacements += - $val =~ s{\257<\Q$perlexpr\E\257>} - {$arg ? $arg->replace($perlexpr,$quote_arg,$self) : ""}eg; - } - my $ww = $word; - if($quote) { - $ww = ::shell_quote_scalar($word); - $val = ::shell_quote_scalar($val); - } - if($number_of_replacements) { - CORE::push(@{$replace{$ww}}, $val); - } - } - } - - if($quote) { - @target = ::shell_quote(@target); - } - # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); - if(%replace) { - # Substitute the replace strings with the replacement values - # Must be sorted by length if a short word is a substring of a long word - my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } - sort { length $b <=> length $a } keys %replace); - for(@target) { - s/($regexp)/join(" ",@{$replace{$1}})/ge; - } - } - ::debug("replace", "Return @target\n"); - return wantarray ? @target : "@target"; -} - - -package CommandLineQueue; - -sub new { - my $class = shift; - my $commandref = shift; - my $read_from = shift; - my $context_replace = shift; - my $max_number_of_args = shift; - my $return_files = shift; - my @unget = (); - my ($count,%replacecount,$posrpl,$perlexpr,%len); - my @command = @$commandref; - # If the first command start with '-' it is probably an option - if($command[0] =~ /^\s*(-\S+)/) { - # Is this really a command in $PATH starting with '-'? - my $cmd = $1; - if(not ::which($cmd)) { - ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); - ::wait_and_exit(255); - } - } - # Replace replacement strings with {= perl expr =} - # Protect matching inside {= perl expr =} - # by replacing {= and =} with \257< and \257> - for(@command) { - if(/\257/) { - ::error("Command cannot contain the character \257. Use a function for that.\n"); - ::wait_and_exit(255); - } - s/\Q$Global::parensleft\E(.*?)\Q$Global::parensright\E/\257<$1\257>/gx; - } - for my $rpl (keys %Global::rpl) { - # Replace the short hand string with the {= perl expr =} in $command and $opt::tagstring - # Avoid replacing inside existing {= perl expr =} - for(@command,@Global::ret_files) { - while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> - \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/xg) { - } - } - if(defined $opt::tagstring) { - for($opt::tagstring) { - while(s/((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> - \Q$rpl\E/$1\257<$Global::rpl{$rpl}\257>/x) {} - } - } - # Do the same for the positional replacement strings - # A bit harder as we have to put in the position number - $posrpl = $rpl; - if($posrpl =~ s/^\{//) { - # Only do this if the shorthand start with { - for(@command,@Global::ret_files) { - s/\{(-?\d+)\Q$posrpl\E/\257<$1 $Global::rpl{$rpl}\257>/g; - } - if(defined $opt::tagstring) { - $opt::tagstring =~ s/\{(-?\d+)\Q$posrpl\E/\257<$1 $perlexpr\257>/g; - } - } - } - my $sum = 0; - while($sum == 0) { - # Count how many times each replacement string is used - my @cmd = @command; - my $contextlen = 0; - my $noncontextlen = 0; - my $contextgroups = 0; - for my $c (@cmd) { - while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { - # %replacecount = { "perlexpr" => number of times seen } - # e.g { "$_++" => 2 } - $replacecount{$1} ++; - $sum++; - } - # Measure the length of the context around the {= perl expr =} - # Use that {=...=} has been replaced with \000 above - # So there is no need to deal with \257< - while($c =~ s/ (\S*\000\S*) //x) { - my $w = $1; - $w =~ tr/\000//d; # Remove all \000's - $contextlen += length($w); - $contextgroups++; - } - # All {= perl expr =} have been removed: The rest is non-context - $noncontextlen += length $c; - } - if($opt::tagstring) { - my $t = $opt::tagstring; - while($t =~ s/ \257<([^\257]*)\257> //x) { - # %replacecount = { "perlexpr" => number of times seen } - # e.g { "$_++" => 2 } - # But for tagstring we just need to mark it as seen - $replacecount{$1}||=1; - } - } - - $len{'context'} = 0+$contextlen; - $len{'noncontext'} = $noncontextlen; - $len{'contextgroups'} = $contextgroups; - $len{'noncontextgroups'} = @cmd-$contextgroups; - ::debug("length", "@command Context: ", $len{'context'}, - " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, - " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); - if($sum == 0) { - # Default command = {} - # If not replacement string: append {} - if(not @command) { - @command = ("\257<\257>"); - $Global::noquote = 1; - } elsif(($opt::pipe or $opt::pipepart) - and not $opt::fifo and not $opt::cat) { - # With --pipe / --pipe-part you can have no replacement - last; - } else { - # Append {} to the command if there are no {...}'s and no {=...=} - push @command, ("\257<\257>"); - } - } - } - - return bless { - 'unget' => \@unget, - 'command' => \@command, - 'replacecount' => \%replacecount, - 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), - 'context_replace' => $context_replace, - 'len' => \%len, - 'max_number_of_args' => $max_number_of_args, - 'size' => undef, - 'return_files' => $return_files, - 'seq' => 1, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - if(@{$self->{'unget'}}) { - my $cmd_line = shift @{$self->{'unget'}}; - return ($cmd_line); - } else { - my $cmd_line; - $cmd_line = CommandLine->new($self->seq(), - $self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}, - $self->{'return_files'}, - $self->{'replacecount'}, - $self->{'len'}, - ); - $cmd_line->populate(); - ::debug("init","cmd_line->number_of_args ", - $cmd_line->number_of_args(), "\n"); - if($opt::pipe or $opt::pipepart) { - if($cmd_line->replaced() eq "") { - # Empty command - pipe requires a command - ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); - ::wait_and_exit(255); - } - } else { - if($cmd_line->number_of_args() == 0) { - # We did not get more args - maybe at EOF string? - return undef; - } elsif($cmd_line->replaced() eq "") { - # Empty command - get the next instead - return $self->get(); - } - } - $self->set_seq($self->seq()+1); - return $cmd_line; - } -} - -sub unget { - my $self = shift; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); - ::debug("run", "CommandLineQueue->empty $empty"); - return $empty; -} - -sub seq { - my $self = shift; - return $self->{'seq'}; -} - -sub set_seq { - my $self = shift; - $self->{'seq'} = shift; -} - -sub quote_args { - my $self = shift; - # If there is not command emulate |bash - return $self->{'command'}; -} - -sub size { - my $self = shift; - if(not $self->{'size'}) { - my @all_lines = (); - while(not $self->{'arg_queue'}->empty()) { - push @all_lines, CommandLine->new($self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}); - } - $self->{'size'} = @all_lines; - $self->unget(@all_lines); - } - return $self->{'size'}; -} - - -package Limits::Command; - -# Maximal command line length (for -m and -X) -sub max_length { - # Find the max_length of a command line and cache it - # Returns: - # number of chars on the longest command line allowed - if(not $Limits::Command::line_max_len) { - # Disk cache of max command line length - my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); - my $cached_limit; - if(-e $len_cache) { - open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); - $cached_limit = <$fh>; - close $fh; - } else { - $cached_limit = real_max_length(); - # If $HOME is write protected: Do not fail - mkdir($ENV{'HOME'} . "/.parallel"); - mkdir($ENV{'HOME'} . "/.parallel/tmp"); - open(my $fh, ">", $len_cache); - print $fh $cached_limit; - close $fh; - } - $Limits::Command::line_max_len = $cached_limit; - if($opt::max_chars) { - if($opt::max_chars <= $cached_limit) { - $Limits::Command::line_max_len = $opt::max_chars; - } else { - ::warning("Value for -s option ", - "should be < $cached_limit.\n"); - } - } - } - return $Limits::Command::line_max_len; -} - -sub real_max_length { - # Find the max_length of a command line - # Returns: - # The maximal command line length - # Use an upper bound of 8 MB if the shell allows for for infinite long lengths - my $upper = 8_000_000; - my $len = 8; - do { - if($len > $upper) { return $len }; - $len *= 16; - } while (is_acceptable_command_line_length($len)); - # Then search for the actual max length between 0 and upper bound - return binary_find_max_length(int($len/16),$len); -} - -sub binary_find_max_length { - # Given a lower and upper bound find the max_length of a command line - # Returns: - # number of chars on the longest command line allowed - my ($lower, $upper) = (@_); - if($lower == $upper or $lower == $upper-1) { return $lower; } - my $middle = int (($upper-$lower)/2 + $lower); - ::debug("init", "Maxlen: $lower,$upper,$middle : "); - if (is_acceptable_command_line_length($middle)) { - return binary_find_max_length($middle,$upper); - } else { - return binary_find_max_length($lower,$middle); - } -} - -sub is_acceptable_command_line_length { - # Test if a command line of this length can run - # Returns: - # 0 if the command line length is too long - # 1 otherwise - my $len = shift; - - local *STDERR; - open (STDERR, ">", "/dev/null"); - system "true "."x"x$len; - close STDERR; - ::debug("init", "$len=$? "); - return not $?; -} - - -package RecordQueue; - -sub new { - my $class = shift; - my $fhs = shift; - my $colsep = shift; - my @unget = (); - my $arg_sub_queue; - if($colsep) { - # Open one file with colsep - $arg_sub_queue = RecordColQueue->new($fhs); - } else { - # Open one or more files if multiple -a - $arg_sub_queue = MultifileQueue->new($fhs); - } - return bless { - 'unget' => \@unget, - 'arg_number' => 0, - 'arg_sub_queue' => $arg_sub_queue, - }, ref($class) || $class; -} - -sub get { - # Returns: - # reference to array of Arg-objects - my $self = shift; - if(@{$self->{'unget'}}) { - $self->{'arg_number'}++; - return shift @{$self->{'unget'}}; - } - my $ret = $self->{'arg_sub_queue'}->get(); - if(defined $Global::max_number_of_args - and $Global::max_number_of_args == 0) { - ::debug("run", "Read 1 but return 0 args\n"); - return [Arg->new("")]; - } else { - return $ret; - } -} - -sub unget { - my $self = shift; - ::debug("run", "RecordQueue-unget '@_'\n"); - $self->{'arg_number'} -= @_; - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = not @{$self->{'unget'}}; - $empty &&= $self->{'arg_sub_queue'}->empty(); - ::debug("run", "RecordQueue->empty $empty"); - return $empty; -} - -sub arg_number { - my $self = shift; - return $self->{'arg_number'}; -} - - -package RecordColQueue; - -sub new { - my $class = shift; - my $fhs = shift; - my @unget = (); - my $arg_sub_queue = MultifileQueue->new($fhs); - return bless { - 'unget' => \@unget, - 'arg_sub_queue' => $arg_sub_queue, - }, ref($class) || $class; -} - -sub get { - # Returns: - # reference to array of Arg-objects - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my $unget_ref=$self->{'unget'}; - if($self->{'arg_sub_queue'}->empty()) { - return undef; - } - my $in_record = $self->{'arg_sub_queue'}->get(); - if(defined $in_record) { - my @out_record = (); - for my $arg (@$in_record) { - ::debug("run", "RecordColQueue::arg $arg\n"); - my $line = $arg->orig(); - ::debug("run", "line='$line'\n"); - if($line ne "") { - for my $s (split /$opt::colsep/o, $line, -1) { - push @out_record, Arg->new($s); - } - } else { - push @out_record, Arg->new(""); - } - } - return \@out_record; - } else { - return undef; - } -} - -sub unget { - my $self = shift; - ::debug("run", "RecordColQueue-unget '@_'\n"); - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); - ::debug("run", "RecordColQueue->empty $empty"); - return $empty; -} - - -package MultifileQueue; - -@Global::unget_argv=(); - -sub new { - my $class = shift; - my $fhs = shift; - for my $fh (@$fhs) { - if(-t $fh) { - ::warning("Input is read from the terminal. ". - "Only experts do this on purpose. ". - "Press CTRL-D to exit.\n"); - } - } - return bless { - 'unget' => \@Global::unget_argv, - 'fhs' => $fhs, - 'arg_matrix' => undef, - }, ref($class) || $class; -} - -sub get { - my $self = shift; - if($opt::xapply) { - return $self->xapply_get(); - } else { - return $self->nest_get(); - } -} - -sub unget { - my $self = shift; - ::debug("run", "MultifileQueue-unget '@_'\n"); - unshift @{$self->{'unget'}}, @_; -} - -sub empty { - my $self = shift; - my $empty = (not @Global::unget_argv - and not @{$self->{'unget'}}); - for my $fh (@{$self->{'fhs'}}) { - $empty &&= eof($fh); - } - ::debug("run", "MultifileQueue->empty $empty "); - return $empty; -} - -sub xapply_get { - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my @record = (); - my $prepend = undef; - my $empty = 1; - for my $fh (@{$self->{'fhs'}}) { - my $arg = read_arg_from_fh($fh); - if(defined $arg) { - # Record $arg for recycling at end of file - push @{$self->{'arg_matrix'}{$fh}}, $arg; - push @record, $arg; - $empty = 0; - } else { - ::debug("run", "EOA "); - # End of file: Recycle arguments - push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; - # return last @{$args->{'args'}{$fh}}; - push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; - } - } - if($empty) { - return undef; - } else { - return \@record; - } -} - -sub nest_get { - my $self = shift; - if(@{$self->{'unget'}}) { - return shift @{$self->{'unget'}}; - } - my @record = (); - my $prepend = undef; - my $empty = 1; - my $no_of_inputsources = $#{$self->{'fhs'}} + 1; - if(not $self->{'arg_matrix'}) { - # Initialize @arg_matrix with one arg from each file - # read one line from each file - my @first_arg_set; - my $all_empty = 1; - for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { - my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); - if(defined $arg) { - $all_empty = 0; - } - $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); - push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; - } - if($all_empty) { - # All filehandles were at eof or eof-string - return undef; - } - return [@first_arg_set]; - } - - # Treat the case with one input source special. For multiple - # input sources we need to remember all previously read values to - # generate all combinations. But for one input source we can - # forget the value after first use. - if($no_of_inputsources == 1) { - my $arg = read_arg_from_fh($self->{'fhs'}[0]); - if(defined($arg)) { - return [$arg]; - } - return undef; - } - for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { - if(eof($self->{'fhs'}[$fhno])) { - next; - } else { - # read one - my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); - defined($arg) || next; # If we just read an EOF string: Treat this as EOF - my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; - $self->{'arg_matrix'}[$fhno][$len] = $arg; - # make all new combinations - my @combarg = (); - for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { - push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; - } - $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry - # map combinations - # [ 1, 3, 7 ], [ 2, 4, 1 ] - # => - # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] - my @mapped; - for my $c (expand_combinations(@combarg)) { - my @a; - for my $n (0 .. $no_of_inputsources - 1 ) { - push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; - } - push @mapped, \@a; - } - # append the mapped to the ungotten arguments - push @{$self->{'unget'}}, @mapped; - # get the first - return shift @{$self->{'unget'}}; - } - } - # all are eof or at EOF string; return from the unget queue - return shift @{$self->{'unget'}}; -} - -sub read_arg_from_fh { - # Read one Arg from filehandle - # Returns: - # Arg-object with one read line - # undef if end of file - my $fh = shift; - my $prepend = undef; - my $arg; - do {{ - # This makes 10% faster - if(not ($arg = <$fh>)) { - if(defined $prepend) { - return Arg->new($prepend); - } else { - return undef; - } - } -# ::debug("run", "read $arg\n"); - # Remove delimiter - $arg =~ s:$/$::; - if($Global::end_of_file_string and - $arg eq $Global::end_of_file_string) { - # Ignore the rest of input file - close $fh; - ::debug("run", "EOF-string ($arg) met\n"); - if(defined $prepend) { - return Arg->new($prepend); - } else { - return undef; - } - } - if(defined $prepend) { - $arg = $prepend.$arg; # For line continuation - $prepend = undef; #undef; - } - if($Global::ignore_empty) { - if($arg =~ /^\s*$/) { - redo; # Try the next line - } - } - if($Global::max_lines) { - if($arg =~ /\s$/) { - # Trailing space => continued on next line - $prepend = $arg; - redo; - } - } - }} while (1 == 0); # Dummy loop {{}} for redo - if(defined $arg) { - return Arg->new($arg); - } else { - ::die_bug("multiread arg undefined"); - } -} - -sub expand_combinations { - # Input: - # ([xmin,xmax], [ymin,ymax], ...) - # Returns: ([x,y,...],[x,y,...]) - # where xmin <= x <= xmax and ymin <= y <= ymax - my $minmax_ref = shift; - my $xmin = $$minmax_ref[0]; - my $xmax = $$minmax_ref[1]; - my @p; - if(@_) { - # If there are more columns: Compute those recursively - my @rest = expand_combinations(@_); - for(my $x = $xmin; $x <= $xmax; $x++) { - push @p, map { [$x, @$_] } @rest; - } - } else { - for(my $x = $xmin; $x <= $xmax; $x++) { - push @p, [$x]; - } - } - return @p; -} - - -package Arg; - -sub new { - my $class = shift; - my $orig = shift; - my @hostgroups; - if($opt::hostgroups) { - if($orig =~ s:@(.+)::) { - # We found hostgroups on the arg - @hostgroups = split(/\+/, $1); - if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { - ::warning("No such hostgroup (@hostgroups)\n"); - @hostgroups = (keys %Global::hostgroups); - } - } else { - @hostgroups = (keys %Global::hostgroups); - } - } - return bless { - 'orig' => $orig, - 'hostgroups' => \@hostgroups, - }, ref($class) || $class; -} - -sub replace { - # Calculates the corresponding value for a given perl expression - # Returns: - # The calculated string (quoted if asked for) - my $self = shift; - my $perlexpr = shift; # E.g. $_=$_ or s/.gz// - my $quote = (shift) ? 1 : 0; # should the string be quoted? - # This is actually a CommandLine-object, - # but it looks nice to be able to say {= $job->slot() =} - my $job = shift; - $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace - if(not defined $self->{"rpl",0,$perlexpr}) { - local $_; - if($Global::trim eq "n") { - $_ = $self->{'orig'}; - } else { - $_ = trim_of($self->{'orig'}); - } - ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); - if(not $Global::perleval{$perlexpr}) { - # Make an anonymous function of the $perlexpr - # And more importantly: Compile it only once - if($Global::perleval{$perlexpr} = - eval('sub { no strict; no warnings; my $job = shift; '. - $perlexpr.' }')) { - # All is good - } else { - # The eval failed. Maybe $perlexpr is invalid perl? - ::error("Cannot use $perlexpr: $@\n"); - ::wait_and_exit(255); - } - } - # Execute the function - $Global::perleval{$perlexpr}->($job); - $self->{"rpl",0,$perlexpr} = $_; - } - if(not defined $self->{"rpl",$quote,$perlexpr}) { - $self->{"rpl",1,$perlexpr} = - ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); - } - return $self->{"rpl",$quote,$perlexpr}; -} - -sub orig { - my $self = shift; - return $self->{'orig'}; -} - -sub trim_of { - # Removes white space as specifed by --trim: - # n = nothing - # l = start - # r = end - # lr|rl = both - # Returns: - # string with white space removed as needed - my @strings = map { defined $_ ? $_ : "" } (@_); - my $arg; - if($Global::trim eq "n") { - # skip - } elsif($Global::trim eq "l") { - for my $arg (@strings) { $arg =~ s/^\s+//; } - } elsif($Global::trim eq "r") { - for my $arg (@strings) { $arg =~ s/\s+$//; } - } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { - for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } - } else { - ::error("--trim must be one of: r l rl lr.\n"); - ::wait_and_exit(255); - } - return wantarray ? @strings : "@strings"; -} - - -package TimeoutQueue; - -sub new { - my $class = shift; - my $delta_time = shift; - my ($pct); - if($delta_time =~ /(\d+(\.\d+)?)%/) { - # Timeout in percent - $pct = $1/100; - $delta_time = 1_000_000; - } - return bless { - 'queue' => [], - 'delta_time' => $delta_time, - 'pct' => $pct, - 'remedian_idx' => 0, - 'remedian_arr' => [], - 'remedian' => undef, - }, ref($class) || $class; -} - -sub delta_time { - my $self = shift; - return $self->{'delta_time'}; -} - -sub set_delta_time { - my $self = shift; - $self->{'delta_time'} = shift; -} - -sub remedian { - my $self = shift; - return $self->{'remedian'}; -} - -sub set_remedian { - # Set median of the last 999^3 (=997002999) values using Remedian - # - # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A - # robust averaging method for large data sets." Journal of the - # American Statistical Association 85.409 (1990): 97-104. - my $self = shift; - my $val = shift; - my $i = $self->{'remedian_idx'}++; - my $rref = $self->{'remedian_arr'}; - $rref->[0][$i%999] = $val; - $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; - $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; - $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; -} - -sub update_delta_time { - # Update delta_time based on runtime of finished job if timeout is - # a percentage - my $self = shift; - my $runtime = shift; - if($self->{'pct'}) { - $self->set_remedian($runtime); - $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); - ::debug("run", "Timeout: $self->{'delta_time'}s "); - } -} - -sub process_timeouts { - # Check if there was a timeout - my $self = shift; - # $self->{'queue'} is sorted by start time - while (@{$self->{'queue'}}) { - my $job = $self->{'queue'}[0]; - if($job->endtime()) { - # Job already finished. No need to timeout the job - # This could be because of --keep-order - shift @{$self->{'queue'}}; - } elsif($job->timedout($self->{'delta_time'})) { - # Need to shift off queue before kill - # because kill calls usleep that calls process_timeouts - shift @{$self->{'queue'}}; - $job->kill(); - } else { - # Because they are sorted by start time the rest are later - last; - } - } -} - -sub insert { - my $self = shift; - my $in = shift; - push @{$self->{'queue'}}, $in; -} - - -package Semaphore; - -# This package provides a counting semaphore -# -# If a process dies without releasing the semaphore the next process -# that needs that entry will clean up dead semaphores -# -# The semaphores are stored in ~/.parallel/semaphores/id- Each -# file in ~/.parallel/semaphores/id-/ is the process ID of the -# process holding the entry. If the process dies, the entry can be -# taken by another process. - -sub new { - my $class = shift; - my $id = shift; - my $count = shift; - $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex - $id="id-".$id; # To distinguish it from a process id - my $parallel_dir = $ENV{'HOME'}."/.parallel"; - -d $parallel_dir or mkdir_or_die($parallel_dir); - my $parallel_locks = $parallel_dir."/semaphores"; - -d $parallel_locks or mkdir_or_die($parallel_locks); - my $lockdir = "$parallel_locks/$id"; - my $lockfile = $lockdir.".lock"; - if($count < 1) { ::die_bug("semaphore-count: $count"); } - return bless { - 'lockfile' => $lockfile, - 'lockfh' => Symbol::gensym(), - 'lockdir' => $lockdir, - 'id' => $id, - 'idfile' => $lockdir."/".$id, - 'pid' => $$, - 'pidfile' => $lockdir."/".$$.'@'.::hostname(), - 'count' => $count + 1 # nlinks returns a link for the 'id-' as well - }, ref($class) || $class; -} - -sub acquire { - my $self = shift; - my $sleep = 1; # 1 ms - my $start_time = time; - while(1) { - $self->atomic_link_if_count_less_than() and last; - ::debug("sem", "Remove dead locks"); - my $lockdir = $self->{'lockdir'}; - for my $d (glob "$lockdir/*") { - ::debug("sem", "Lock $d $lockdir\n"); - $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; - my ($pid, $host) = ($1, $2); - if($host eq ::hostname()) { - if(not kill 0, $1) { - ::debug("sem", "Dead: $d"); - unlink $d; - } else { - ::debug("sem", "Alive: $d"); - } - } - } - # try again - $self->atomic_link_if_count_less_than() and last; - # Retry slower and slower up to 1 second - $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); - # Random to avoid every sleeping job waking up at the same time - ::usleep(rand()*$sleep); - if(defined($opt::timeout) and - $start_time + $opt::timeout > time) { - # Acquire the lock anyway - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("timeout_write_idfile: $self->{'idfile'}"); - close $fh; - } - link $self->{'idfile'}, $self->{'pidfile'}; - last; - } - } - ::debug("sem", "acquired $self->{'pid'}\n"); -} - -sub release { - my $self = shift; - unlink $self->{'pidfile'}; - if($self->nlinks() == 1) { - # This is the last link, so atomic cleanup - $self->lock(); - if($self->nlinks() == 1) { - unlink $self->{'idfile'}; - rmdir $self->{'lockdir'}; - } - $self->unlock(); - } - ::debug("run", "released $self->{'pid'}\n"); -} - -sub _release { - my $self = shift; - - unlink $self->{'pidfile'}; - $self->lock(); - my $nlinks = $self->nlinks(); - ::debug("sem", $nlinks, "<", $self->{'count'}); - if($nlinks-- > 1) { - unlink $self->{'idfile'}; - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - print $fh "#"x$nlinks; - close $fh; - } else { - unlink $self->{'idfile'}; - rmdir $self->{'lockdir'}; - } - $self->unlock(); - ::debug("sem", "released $self->{'pid'}\n"); -} - -sub atomic_link_if_count_less_than { - # Link $file1 to $file2 if nlinks to $file1 < $count - my $self = shift; - my $retval = 0; - $self->lock(); - ::debug($self->nlinks(), "<", $self->{'count'}); - if($self->nlinks() < $self->{'count'}) { - -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - close $fh; - } - $retval = link $self->{'idfile'}, $self->{'pidfile'}; - } - $self->unlock(); - ::debug("run", "atomic $retval"); - return $retval; -} - -sub _atomic_link_if_count_less_than { - # Link $file1 to $file2 if nlinks to $file1 < $count - my $self = shift; - my $retval = 0; - $self->lock(); - my $nlinks = $self->nlinks(); - ::debug("sem", $nlinks, "<", $self->{'count'}); - if($nlinks++ < $self->{'count'}) { - -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); - if(not -e $self->{'idfile'}) { - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - close $fh; - } - open (my $fh, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - print $fh "#"x$nlinks; - close $fh; - $retval = link $self->{'idfile'}, $self->{'pidfile'}; - } - $self->unlock(); - ::debug("sem", "atomic $retval"); - return $retval; -} - -sub nlinks { - my $self = shift; - if(-e $self->{'idfile'}) { - ::debug("sem", "nlinks", (stat(_))[3], "size", (stat(_))[7], "\n"); - return (stat(_))[3]; - } else { - return 0; - } -} - -sub lock { - my $self = shift; - my $sleep = 100; # 100 ms - my $total_sleep = 0; - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; - my $locked = 0; - while(not $locked) { - if(tell($self->{'lockfh'}) == -1) { - # File not open - open($self->{'lockfh'}, ">", $self->{'lockfile'}) - or ::debug("run", "Cannot open $self->{'lockfile'}"); - } - if($self->{'lockfh'}) { - # File is open - chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw - if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { - # The file is locked: No need to retry - $locked = 1; - last; - } else { - if ($! =~ m/Function not implemented/) { - ::warning("flock: $!"); - ::warning("Will wait for a random while\n"); - ::usleep(rand(5000)); - # File cannot be locked: No need to retry - $locked = 2; - last; - } - } - } - # Locking failed in first round - # Sleep and try again - $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); - # Random to avoid every sleeping job waking up at the same time - ::usleep(rand()*$sleep); - $total_sleep += $sleep; - if($opt::semaphoretimeout) { - if($total_sleep/1000 > $opt::semaphoretimeout) { - # Timeout: bail out - ::warning("Semaphore timed out. Ignoring timeout."); - $locked = 3; - last; - } - } else { - if($total_sleep/1000 > 30) { - ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); - } - } - } - ::debug("run", "locked $self->{'lockfile'}"); -} - -sub unlock { - my $self = shift; - unlink $self->{'lockfile'}; - close $self->{'lockfh'}; - ::debug("run", "unlocked\n"); -} - -sub mkdir_or_die { - # If dir is not writable: die - my $dir = shift; - my @dir_parts = split(m:/:,$dir); - my ($ddir,$part); - while(defined ($part = shift @dir_parts)) { - $part eq "" and next; - $ddir .= "/".$part; - -d $ddir and next; - mkdir $ddir; - } - if(not -w $dir) { - ::error("Cannot write to $dir: $!\n"); - ::wait_and_exit(255); - } -} - -# Keep perl -w happy -$opt::x = $Semaphore::timeout = $Semaphore::wait = -$Job::file_descriptor_warning_printed = 0;