-#!/usr/bin/perl -w
-# Copyright Steven J. Ross 2008 - 2014
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-#
-# See http://www.boost.org/libs/sort for library home page.
-
-# A speed and accuracy testing and automated parameter tuning script.
-
-$usage = "usage: tune.pl [-tune] [-real] [-tune_verify] [-verbose] [-multiple_iterations] [-large] [-small] [-windows] [fileSize]\n";
-# testing sorting on 40 million elements by default
-# don't test on below 2^22 (4 million) elements as that is the minimum
-# for max_splits of 11 to be efficient
-use File::Compare;
-$defFileSize = 5000000;
-$loopCount = 1;
-$realtimes = 0;
-$verifycorrect = 0;
-$verbose = 0;
-$exename = "spreadsort";
-$makename = "b2 \-\-tune";
-$all = "";
-$iter_count = 1;
-$debug = 0;
-$log = "> .tunelog";
-$log2 = "> .tunelog 2>&1";
-$diffopt = "-q";
-$tune = 0;
-# have to change the path for UNIX
-$prev_path = $ENV{'PATH'};
-$ENV{'PATH'} = '.:'.$prev_path;
-
-for (my $ii = 0; $ii < @ARGV; $ii++) {
- my $currArg = $ARGV[$ii];
- if ($currArg =~ /^-help$/) {
- print STDERR $usage;
- exit(0);
- }
- # verification roughly doubles the runtime of this script,
- # but it does make sure that results are correct during tuning
- # verification always runs during speed comparisons with std::sort
- if ($currArg =~ /^-tune_verify$/) {
- $verifycorrect = 1;
- # use real times only, don't use weighting and special-case tests
- # this saves about 5/6 of the script runtime but results are different
- } elsif ($currArg =~ /^-real$/) {
- $realtimes = 1;
- } elsif ($currArg =~ /^-verbose$/) {
- $verbose = 1;
- # runs until we converge on a precise set of values
- # defaults off because of runtime
- } elsif ($currArg =~ /^-multiple_iterations$/) {
- $iter_count = 4;
- } elsif ($currArg =~ /^-debug$/) {
- $debug = 1;
- $log = "";
- $diffopt = "";
- } elsif ($currArg =~ /^-large$/) {
- $defFileSize = 20000000;
- } elsif ($currArg =~ /^-small$/) {
- $defFileSize = 100000;
- } elsif ($currArg =~ /^-tune$/) {
- $tune = 1;
- } elsif ($currArg =~ /^-windows$/) {
- $makename = "..\\..\\".$makename;
- } elsif ($currArg =~ /^-/) {
- print STDERR $usage;
- exit(0);
- } else {
- $defFileSize = $currArg;
- }
-}
-$fileSize = $defFileSize;
-
-print STDOUT "Tuning variables for $exename on vectors with $defFileSize elements\n";
-
-# these are reasonable values
-$max_splits = 11;
-$log_finishing_count = 31;
-$log_min_size = 11;
-$log_mean_bin_size = 2;
-$float_log_min_size = 10;
-$float_log_mean_bin_size = 2;
-$float_log_finishing_count = 4;
-
-# this value is a minimum to obtain decent file I/O performance
-$min_sort_size = 1000;
-$std = "";
-
-print STDOUT "building randomgen\n";
-system("$makename randomgen $log");
-# Tuning to get convergence, maximum of 4 iterations with multiple iterations
-# option set
-$changed = 1;
-my $ii = 0;
-if ($tune) {
- for ($ii = 0; $changed and $ii < $iter_count; $ii++) {
- $changed = 0;
- # Tuning max_splits is not recommended.
- #print STDOUT "Tuning max_splits\n";
- #TuneVariable(\$max_splits, $log_min_size - $log_mean_bin_size, 17);
- print STDOUT "Tuning log of the minimum count for recursion\n";
- TuneVariable(\$log_min_size, $log_mean_bin_size + 1, $max_splits + $log_mean_bin_size);
- print STDOUT "Tuning log_mean_bin_size\n";
- TuneVariable(\$log_mean_bin_size, 0, $log_min_size - 1);
- print STDOUT "Tuning log_finishing_size\n";
- TuneVariable(\$log_finishing_count, 1, $log_min_size);
- # tuning variables for floats
- $exename = "floatsort";
- print STDOUT "Tuning log of the minimum count for recursion for floats\n";
- TuneVariable(\$float_log_min_size, $float_log_mean_bin_size + 1, $max_splits + $float_log_mean_bin_size);
- print STDOUT "Tuning float_log_mean_bin_size\n";
- TuneVariable(\$float_log_mean_bin_size, 0, $float_log_min_size - 1);
- print STDOUT "Tuning float_log_finishing_size\n";
- TuneVariable(\$float_log_finishing_count, 1, $float_log_min_size);
- $exename = "spreadsort";
- }
-
- # After optimizations for large datasets are complete, see how small of a
- # dataset can be sped up
- print STDOUT "Tuning minimum sorting size\n";
- TuneMinSize();
- print STDOUT "Writing results\n";
-}
-
-# Doing a final run with final settings to compare sort times
-# also verifying correctness of results
-$verifycorrect = 1;
-$loopCount = 1;
-$fileSize = $defFileSize;
-system("$makename $all $log");
-$std = "";
-PerfTest("Verifying integer_sort", "spreadsort");
-PerfTest("Verifying float_sort", "floatsort");
-PerfTest("Verifying string_sort", "stringsort");
-PerfTest("Verifying integer_sort with mostly-sorted data", "mostlysorted");
-PerfTest("Timing integer_sort on already-sorted data", "alreadysorted");
-PerfTest("Verifying integer_sort with rightshift", "rightshift");
-PerfTest("Verifying integer_sort with 64-bit integers", "int64");
-PerfTest("Verifying integer_sort with separate key and data", "keyplusdata");
-PerfTest("Verifying reverse integer_sort", "reverseintsort");
-PerfTest("Verifying float_sort with doubles", "double");
-PerfTest("Verifying float_sort with shift functor", "shiftfloatsort");
-PerfTest("Verifying float_sort with functors", "floatfunctorsort");
-PerfTest("Verifying string_sort with indexing functors", "charstringsort");
-PerfTest("Verifying string_sort with all functors", "stringfunctorsort");
-PerfTest("Verifying reverse_string_sort", "reversestringsort");
-PerfTest("Verifying reverse_string_sort with functors",
- "reversestringfunctorsort");
-PerfTest("Verifying generalized string_sort with multiple keys of different types",
- "generalizedstruct");
-PerfTest("Verifying boost::sort on its custom-built worst-case distribution",
- "binaryalrbreaker");
-# clean up once we finish
-system("$makename clean $log");
-# WINDOWS
-system("del spread_sort_out.txt $log2");
-system("del standard_sort_out.txt $log2");
-system("del input.txt $log2");
-system("del *.rsp $log2");
-system("del *.manifest $log2");
-system("del time.txt $log2");
-# UNIX
-system("rm -f time.txt $log2");
-system("rm -f spread_sort_out.txt $log2");
-system("rm -f standard_sort_out.txt $log2");
-system("rm -f input.txt $log2");
-
-$ENV{'PATH'} = $prev_path;
-
-# A simple speed test comparing std::sort to
-sub PerfTest {
- my ($message, $local_exe) = @_;
- $exename = $local_exe;
- print STDOUT "$message\n";
- $lastTime = SumTimes();
- print STDOUT "runtime: $lastTime\n";
- print STDOUT "std::sort time: $baseTime\n";
- $speedup = (($baseTime/$lastTime) - 1) * 100;
- print STDOUT "speedup: ".sprintf("%.2f", $speedup)."%\n";
-}
-
-# Write an updated constants file as part of tuning.
-sub WriteConstants {
- # deleting the file
- $const_file = 'include/boost/sort/spreadsort/detail/constants.hpp';
- @cannot = grep {not unlink} $const_file;
- print "$0: could not unlink @cannot\n" if @cannot;
-
- # writing the results back to the original file name
- unless(open(CONSTANTS, ">$const_file")) {
- print STDERR "Can't open output file: $const_file: $!\n";
- exit;
- }
- print CONSTANTS "//constant definitions for the Boost Sort library\n\n";
- print CONSTANTS "// Copyright Steven J. Ross 2001 - 2014\n";
- print CONSTANTS "// Distributed under the Boost Software License, Version 1.0.\n";
- print CONSTANTS "// (See accompanying file LICENSE_1_0.txt or copy at\n";
- print CONSTANTS "// http://www.boost.org/LICENSE_1_0.txt)\n\n";
- print CONSTANTS "// See http://www.boost.org/libs/sort for library home page.\n";
- print CONSTANTS "#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";
- print CONSTANTS "#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";
- print CONSTANTS "namespace boost {\n";
- print CONSTANTS "namespace sort {\n";
- print CONSTANTS "namespace spreadsort {\n";
- print CONSTANTS "namespace detail {\n";
- print CONSTANTS "//Tuning constants\n";
- print CONSTANTS "//This should be tuned to your processor cache;\n";
- print CONSTANTS "//if you go too large you get cache misses on bins\n";
- print CONSTANTS "//The smaller this number, the less worst-case memory usage.\n";
- print CONSTANTS "//If too small, too many recursions slow down spreadsort\n";
- print CONSTANTS "enum { max_splits = $max_splits,\n";
- print CONSTANTS "//It's better to have a few cache misses and finish sorting\n";
- print CONSTANTS "//than to run another iteration\n";
- print CONSTANTS "max_finishing_splits = max_splits + 1,\n";
- print CONSTANTS "//Sets the minimum number of items per bin.\n";
- print CONSTANTS "int_log_mean_bin_size = $log_mean_bin_size,\n";
- print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";
- print CONSTANTS "//Minimum value 1\n";
- $log_min_split_count = $log_min_size - $log_mean_bin_size;
- print CONSTANTS "int_log_min_split_count = $log_min_split_count,\n";
- print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";
- print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to integer_sort.\n";
- print CONSTANTS "int_log_finishing_count = $log_finishing_count,\n";
- print CONSTANTS "//Sets the minimum number of items per bin for floating point.\n";
- print CONSTANTS "float_log_mean_bin_size = $float_log_mean_bin_size,\n";
- print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";
- print CONSTANTS "//Minimum value 1\n";
- $float_log_min_split_count = $float_log_min_size - $float_log_mean_bin_size;
- print CONSTANTS "float_log_min_split_count = $float_log_min_split_count,\n";
- print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";
- print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to float_sort.\n";
- print CONSTANTS "float_log_finishing_count = $float_log_finishing_count,\n";
- print CONSTANTS "//There is a minimum size below which it is not worth using spreadsort\n";
- print CONSTANTS "min_sort_size = $min_sort_size };\n";
- print CONSTANTS "}\n}\n}\n}\n#endif\n";
- close CONSTANTS;
- system("$makename $exename $log");
-}
-
-# Sort the file with both std::sort and boost::sort, verify the results are the
-# same, update stdtime with std::sort time, and return boost::sort time.
-sub CheckTime {
- my $sort_time = 0.0;
- my $time_file = "time.txt";
- # use the line below on systems that can't overwrite.
- #system("rm -f $time_file");
- system("$exename $loopCount $std > $time_file");
- unless(open(CODE, $time_file)) {
- print STDERR "Could not open file: $time_file: $!\n";
- exit;
- }
- while ($line = <CODE>) {
- @parts = split("time", $line);
- if (@parts > 1) {
- $sort_time = $parts[1];
- last;
- }
- }
- close(CODE);
- # verifying correctness
- if (not $std and $verifycorrect) {
- system("$exename $loopCount -std > $time_file");
- unless(open(CODE, $time_file)) {
- print STDERR "Could not open file: $time_file: $!\n";
- exit;
- }
- die "Difference in results\n" unless (compare("boost_sort_out.txt","standard_sort_out.txt") == 0) ;
- while ($line = <CODE>) {
- @parts = split("time", $line);
- if (@parts > 1) {
- $stdsingle = $parts[1];
- last;
- }
- }
- close(CODE);
- }
- return $sort_time;
-}
-
-# Sum up times for different data distributions. If realtimes is not set,
-# larger ranges are given a larger weight.
-sub SumTimes {
- my $time = 0;
- $baseTime = 0.0;
- $stdsingle = 0.0;
- my $ii = 1;
- # if we're only using real times, don't bother with the corner-cases
- if ($realtimes) {
- $ii = 8;
- }
- for (; $ii <= 16; $ii++) {
- system("randomgen $ii $ii $fileSize");
- if ($realtimes) {
- $time += CheckTime();
- $baseTime += $stdsingle;
- } else {
- # tests with higher levels of randomness are given
- # higher priority in timing results
- print STDOUT "trying $ii $ii\n" if $debug;
- $time += 2 * $ii * CheckTime();
- $baseTime += 2 * $ii * $stdsingle;
- if ($ii > 1) {
- print STDOUT "trying 1 $ii\n" if $debug;
- system("randomgen 1 $ii $fileSize");
- $time += $ii * CheckTime();
- $baseTime += $ii * $stdsingle;
- print STDOUT "trying $ii 1\n" if $debug;
- system("randomgen $ii 1 $fileSize");
- $time += $ii * CheckTime();
- $baseTime += $ii * $stdsingle;
- }
- }
- }
- if ($time == 0.0) {
- $time = 0.01;
- }
- return $time;
-}
-
-# Tests a range of potential values for a variable, and sets it to the fastest.
-sub TuneVariable {
- my ($tunevar, $beginval, $endval) = @_;
- my $best_val = $$tunevar;
- my $besttime = 0;
- my $startval = $$tunevar;
- for ($$tunevar = $beginval; $$tunevar <= $endval; $$tunevar++) {
- WriteConstants();
- $sumtime = SumTimes();
- # If this value is better, use it. If this is the start value
- # and it's just as good, use the startval
- if (not $besttime or ($sumtime < $besttime) or (($besttime == $sumtime) and ($$tunevar == $startval))) {
- $besttime = $sumtime;
- $best_val = $$tunevar;
- }
- print STDOUT "Value: $$tunevar Time: $sumtime\n" if $verbose;
- }
- $$tunevar = $best_val;
- print STDOUT "Best Value: $best_val\n";
- if ($best_val != $startval) {
- $changed = 1;
- }
-}
-
-# Determine the cutoff size below which std::sort is faster.
-sub TuneMinSize {
- for (; $min_sort_size <= $defFileSize; $min_sort_size *= 2) {
- $loopCount = ($defFileSize/$min_sort_size)/10;
- $fileSize = $min_sort_size;
- WriteConstants();
- $std = "";
- $sumtime = SumTimes();
- $std = "-std";
- $stdtime = SumTimes();
- print STDOUT "Size: $min_sort_size boost::sort Time: $sumtime std::sort Time: $stdtime\n";
- last if ($stdtime > $sumtime);
- }
-}
+#!/usr/bin/perl -w\r
+# Copyright Steven J. Ross 2008 - 2014\r
+# Distributed under the Boost Software License, Version 1.0.\r
+# (See accompanying file LICENSE_1_0.txt or copy at\r
+# http://www.boost.org/LICENSE_1_0.txt)\r
+#\r
+# See http://www.boost.org/libs/sort for library home page.\r
+\r
+# A speed and accuracy testing and automated parameter tuning script.\r
+\r
+$usage = "usage: tune.pl [-tune] [-real] [-tune_verify] [-verbose] [-multiple_iterations] [-large] [-small] [-windows] [fileSize]\n";\r
+# testing sorting on 40 million elements by default\r
+# don't test on below 2^22 (4 million) elements as that is the minimum\r
+# for max_splits of 11 to be efficient\r
+use File::Compare;\r
+$defFileSize = 5000000;\r
+$loopCount = 1;\r
+$realtimes = 0;\r
+$verifycorrect = 0;\r
+$verbose = 0;\r
+$exename = "spreadsort";\r
+$makename = "../../b2 \-\-tune";\r
+$all = "";\r
+$iter_count = 1;\r
+$debug = 0;\r
+$log = "> .tunelog";\r
+$log2 = "> .tunelog 2>&1";\r
+$diffopt = "-q";\r
+$tune = 0;\r
+# have to change the path for UNIX\r
+$prev_path = $ENV{'PATH'}; \r
+$ENV{'PATH'} = '.:'.$prev_path;\r
+\r
+for (my $ii = 0; $ii < @ARGV; $ii++) {\r
+ my $currArg = $ARGV[$ii];\r
+ if ($currArg =~ /^-help$/) {\r
+ print STDERR $usage;\r
+ exit(0);\r
+ }\r
+ # verification roughly doubles the runtime of this script,\r
+ # but it does make sure that results are correct during tuning\r
+ # verification always runs during speed comparisons with std::sort\r
+ if ($currArg =~ /^-tune_verify$/) {\r
+ $verifycorrect = 1;\r
+ # use real times only, don't use weighting and special-case tests\r
+ # this saves about 5/6 of the script runtime but results are different\r
+ } elsif ($currArg =~ /^-real$/) {\r
+ $realtimes = 1;\r
+ } elsif ($currArg =~ /^-verbose$/) {\r
+ $verbose = 1;\r
+ # runs until we converge on a precise set of values\r
+ # defaults off because of runtime\r
+ } elsif ($currArg =~ /^-multiple_iterations$/) {\r
+ $iter_count = 4;\r
+ } elsif ($currArg =~ /^-debug$/) {\r
+ $debug = 1;\r
+ $log = "";\r
+ $diffopt = "";\r
+ } elsif ($currArg =~ /^-large$/) {\r
+ $defFileSize = 20000000;\r
+ } elsif ($currArg =~ /^-small$/) {\r
+ $defFileSize = 100000;\r
+ } elsif ($currArg =~ /^-tune$/) {\r
+ $tune = 1;\r
+ } elsif ($currArg =~ /^-windows$/) {\r
+ $makename = "..\\..\\".$makename;\r
+ } elsif ($currArg =~ /^-/) {\r
+ print STDERR $usage;\r
+ exit(0);\r
+ } else {\r
+ $defFileSize = $currArg;\r
+ }\r
+}\r
+$fileSize = $defFileSize;\r
+\r
+print STDOUT "Tuning variables for $exename on vectors with $defFileSize elements\n";\r
+\r
+# these are reasonable values\r
+$max_splits = 11;\r
+$log_finishing_count = 31;\r
+$log_min_size = 11;\r
+$log_mean_bin_size = 2;\r
+$float_log_min_size = 10;\r
+$float_log_mean_bin_size = 2;\r
+$float_log_finishing_count = 4;\r
+\r
+# this value is a minimum to obtain decent file I/O performance\r
+$min_sort_size = 1000;\r
+$std = "";\r
+\r
+print STDOUT "building randomgen\n";\r
+system("$makename randomgen $log");\r
+# Tuning to get convergence, maximum of 4 iterations with multiple iterations\r
+# option set\r
+$changed = 1;\r
+my $ii = 0;\r
+if ($tune) {\r
+ for ($ii = 0; $changed and $ii < $iter_count; $ii++) {\r
+ $changed = 0;\r
+ # Tuning max_splits is not recommended.\r
+ #print STDOUT "Tuning max_splits\n";\r
+ #TuneVariable(\$max_splits, $log_min_size - $log_mean_bin_size, 17);\r
+ print STDOUT "Tuning log of the minimum count for recursion\n";\r
+ TuneVariable(\$log_min_size, $log_mean_bin_size + 1, $max_splits + $log_mean_bin_size);\r
+ print STDOUT "Tuning log_mean_bin_size\n";\r
+ TuneVariable(\$log_mean_bin_size, 0, $log_min_size - 1);\r
+ print STDOUT "Tuning log_finishing_size\n";\r
+ TuneVariable(\$log_finishing_count, 1, $log_min_size);\r
+ # tuning variables for floats\r
+ $exename = "floatsort";\r
+ print STDOUT "Tuning log of the minimum count for recursion for floats\n";\r
+ TuneVariable(\$float_log_min_size, $float_log_mean_bin_size + 1, $max_splits + $float_log_mean_bin_size);\r
+ print STDOUT "Tuning float_log_mean_bin_size\n";\r
+ TuneVariable(\$float_log_mean_bin_size, 0, $float_log_min_size - 1);\r
+ print STDOUT "Tuning float_log_finishing_size\n";\r
+ TuneVariable(\$float_log_finishing_count, 1, $float_log_min_size);\r
+ $exename = "spreadsort";\r
+ }\r
+\r
+ # After optimizations for large datasets are complete, see how small of a \r
+ # dataset can be sped up\r
+ print STDOUT "Tuning minimum sorting size\n";\r
+ TuneMinSize();\r
+ print STDOUT "Writing results\n";\r
+}\r
+\r
+# Doing a final run with final settings to compare sort times\r
+# also verifying correctness of results\r
+$verifycorrect = 1;\r
+$loopCount = 1;\r
+$fileSize = $defFileSize;\r
+system("$makename $all $log");\r
+$std = "";\r
+PerfTest("Verifying integer_sort", "spreadsort");\r
+PerfTest("Verifying float_sort", "floatsort");\r
+PerfTest("Verifying string_sort", "stringsort");\r
+PerfTest("Verifying integer_sort with mostly-sorted data", "mostlysorted");\r
+PerfTest("Timing integer_sort on already-sorted data", "alreadysorted");\r
+PerfTest("Verifying integer_sort with rightshift", "rightshift");\r
+PerfTest("Verifying integer_sort with 64-bit integers", "int64");\r
+PerfTest("Verifying integer_sort with separate key and data", "keyplusdata");\r
+PerfTest("Verifying reverse integer_sort", "reverseintsort");\r
+PerfTest("Verifying float_sort with doubles", "double");\r
+PerfTest("Verifying float_sort with shift functor", "shiftfloatsort");\r
+PerfTest("Verifying float_sort with functors", "floatfunctorsort");\r
+PerfTest("Verifying string_sort with indexing functors", "charstringsort");\r
+PerfTest("Verifying string_sort with all functors", "stringfunctorsort");\r
+PerfTest("Verifying reverse_string_sort", "reversestringsort");\r
+PerfTest("Verifying reverse_string_sort with functors",\r
+ "reversestringfunctorsort");\r
+PerfTest("Verifying generalized string_sort with multiple keys of different types",\r
+ "generalizedstruct");\r
+PerfTest("Verifying boost::sort on its custom-built worst-case distribution",\r
+ "binaryalrbreaker");\r
+# clean up once we finish\r
+system("$makename clean $log");\r
+# WINDOWS\r
+system("del spread_sort_out.txt $log2");\r
+system("del standard_sort_out.txt $log2");\r
+system("del input.txt $log2");\r
+system("del *.rsp $log2");\r
+system("del *.manifest $log2");\r
+system("del time.txt $log2");\r
+# UNIX\r
+system("rm -f time.txt $log2");\r
+system("rm -f spread_sort_out.txt $log2");\r
+system("rm -f standard_sort_out.txt $log2");\r
+system("rm -f input.txt $log2");\r
+\r
+$ENV{'PATH'} = $prev_path;\r
+\r
+# A simple speed test comparing std::sort to \r
+sub PerfTest {\r
+ my ($message, $local_exe) = @_;\r
+ $exename = $local_exe;\r
+ print STDOUT "$message\n";\r
+ $lastTime = SumTimes();\r
+ print STDOUT "runtime: $lastTime\n";\r
+ print STDOUT "std::sort time: $baseTime\n";\r
+ $speedup = (($baseTime/$lastTime) - 1) * 100;\r
+ print STDOUT "speedup: ".sprintf("%.2f", $speedup)."%\n";\r
+}\r
+\r
+# Write an updated constants file as part of tuning.\r
+sub WriteConstants {\r
+ # deleting the file\r
+ $const_file = 'include/boost/sort/spreadsort/detail/constants.hpp';\r
+ @cannot = grep {not unlink} $const_file;\r
+ print "$0: could not unlink @cannot\n" if @cannot;\r
+\r
+ # writing the results back to the original file name\r
+ unless(open(CONSTANTS, ">$const_file")) {\r
+ print STDERR "Can't open output file: $const_file: $!\n";\r
+ exit;\r
+ }\r
+ print CONSTANTS "//constant definitions for the Boost Sort library\n\n";\r
+ print CONSTANTS "// Copyright Steven J. Ross 2001 - 2014\n";\r
+ print CONSTANTS "// Distributed under the Boost Software License, Version 1.0.\n";\r
+ print CONSTANTS "// (See accompanying file LICENSE_1_0.txt or copy at\n";\r
+ print CONSTANTS "// http://www.boost.org/LICENSE_1_0.txt)\n\n";\r
+ print CONSTANTS "// See http://www.boost.org/libs/sort for library home page.\n";\r
+ print CONSTANTS "#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";\r
+ print CONSTANTS "#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS\n";\r
+ print CONSTANTS "namespace boost {\n";\r
+ print CONSTANTS "namespace sort {\n";\r
+ print CONSTANTS "namespace spreadsort {\n";\r
+ print CONSTANTS "namespace detail {\n";\r
+ print CONSTANTS "//Tuning constants\n";\r
+ print CONSTANTS "//This should be tuned to your processor cache;\n"; \r
+ print CONSTANTS "//if you go too large you get cache misses on bins\n";\r
+ print CONSTANTS "//The smaller this number, the less worst-case memory usage.\n"; \r
+ print CONSTANTS "//If too small, too many recursions slow down spreadsort\n";\r
+ print CONSTANTS "enum { max_splits = $max_splits,\n";\r
+ print CONSTANTS "//It's better to have a few cache misses and finish sorting\n";\r
+ print CONSTANTS "//than to run another iteration\n";\r
+ print CONSTANTS "max_finishing_splits = max_splits + 1,\n";\r
+ print CONSTANTS "//Sets the minimum number of items per bin.\n";\r
+ print CONSTANTS "int_log_mean_bin_size = $log_mean_bin_size,\n";\r
+ print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";\r
+ print CONSTANTS "//Minimum value 1\n";\r
+ $log_min_split_count = $log_min_size - $log_mean_bin_size;\r
+ print CONSTANTS "int_log_min_split_count = $log_min_split_count,\n";\r
+ print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";\r
+ print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to integer_sort.\n";\r
+ print CONSTANTS "int_log_finishing_count = $log_finishing_count,\n";\r
+ print CONSTANTS "//Sets the minimum number of items per bin for floating point.\n";\r
+ print CONSTANTS "float_log_mean_bin_size = $float_log_mean_bin_size,\n";\r
+ print CONSTANTS "//Used to force a comparison-based sorting for small bins, if it's faster.\n";\r
+ print CONSTANTS "//Minimum value 1\n";\r
+ $float_log_min_split_count = $float_log_min_size - $float_log_mean_bin_size;\r
+ print CONSTANTS "float_log_min_split_count = $float_log_min_split_count,\n";\r
+ print CONSTANTS "//This is the minimum split count to use spreadsort when it will finish in one\n";\r
+ print CONSTANTS "//iteration. Make this larger the faster std::sort is relative to float_sort.\n";\r
+ print CONSTANTS "float_log_finishing_count = $float_log_finishing_count,\n";\r
+ print CONSTANTS "//There is a minimum size below which it is not worth using spreadsort\n";\r
+ print CONSTANTS "min_sort_size = $min_sort_size };\n";\r
+ print CONSTANTS "}\n}\n}\n}\n#endif\n";\r
+ close CONSTANTS;\r
+ system("$makename $exename $log");\r
+}\r
+\r
+# Sort the file with both std::sort and boost::sort, verify the results are the\r
+# same, update stdtime with std::sort time, and return boost::sort time.\r
+sub CheckTime {\r
+ my $sort_time = 0.0;\r
+ my $time_file = "time.txt";\r
+ # use the line below on systems that can't overwrite.\r
+ #system("rm -f $time_file");\r
+ system("$exename $loopCount $std > $time_file");\r
+ unless(open(CODE, $time_file)) {\r
+ print STDERR "Could not open file: $time_file: $!\n";\r
+ exit;\r
+ }\r
+ while ($line = <CODE>) {\r
+ @parts = split("time", $line);\r
+ if (@parts > 1) {\r
+ $sort_time = $parts[1];\r
+ last;\r
+ } \r
+ }\r
+ close(CODE);\r
+ # verifying correctness\r
+ if (not $std and $verifycorrect) {\r
+ system("$exename $loopCount -std > $time_file");\r
+ unless(open(CODE, $time_file)) {\r
+ print STDERR "Could not open file: $time_file: $!\n";\r
+ exit;\r
+ }\r
+ die "Difference in results\n" unless (compare("boost_sort_out.txt","standard_sort_out.txt") == 0) ;\r
+ while ($line = <CODE>) {\r
+ @parts = split("time", $line);\r
+ if (@parts > 1) {\r
+ $stdsingle = $parts[1];\r
+ last;\r
+ } \r
+ }\r
+ close(CODE);\r
+ }\r
+ return $sort_time;\r
+}\r
+\r
+# Sum up times for different data distributions. If realtimes is not set, \r
+# larger ranges are given a larger weight.\r
+sub SumTimes {\r
+ my $time = 0;\r
+ $baseTime = 0.0;\r
+ $stdsingle = 0.0;\r
+ my $ii = 1;\r
+ # if we're only using real times, don't bother with the corner-cases\r
+ if ($realtimes) {\r
+ $ii = 8;\r
+ }\r
+ for (; $ii <= 16; $ii++) {\r
+ system("randomgen $ii $ii $fileSize");\r
+ if ($realtimes) {\r
+ $time += CheckTime();\r
+ $baseTime += $stdsingle;\r
+ } else {\r
+ # tests with higher levels of randomness are given \r
+ # higher priority in timing results\r
+ print STDOUT "trying $ii $ii\n" if $debug;\r
+ $time += 2 * $ii * CheckTime();\r
+ $baseTime += 2 * $ii * $stdsingle;\r
+ if ($ii > 1) {\r
+ print STDOUT "trying 1 $ii\n" if $debug;\r
+ system("randomgen 1 $ii $fileSize");\r
+ $time += $ii * CheckTime();\r
+ $baseTime += $ii * $stdsingle;\r
+ print STDOUT "trying $ii 1\n" if $debug;\r
+ system("randomgen $ii 1 $fileSize");\r
+ $time += $ii * CheckTime();\r
+ $baseTime += $ii * $stdsingle;\r
+ }\r
+ }\r
+ }\r
+ if ($time == 0.0) {\r
+ $time = 0.01;\r
+ }\r
+ return $time;\r
+}\r
+\r
+# Tests a range of potential values for a variable, and sets it to the fastest.\r
+sub TuneVariable {\r
+ my ($tunevar, $beginval, $endval) = @_;\r
+ my $best_val = $$tunevar;\r
+ my $besttime = 0;\r
+ my $startval = $$tunevar;\r
+ for ($$tunevar = $beginval; $$tunevar <= $endval; $$tunevar++) {\r
+ WriteConstants();\r
+ $sumtime = SumTimes();\r
+ # If this value is better, use it. If this is the start value\r
+ # and it's just as good, use the startval\r
+ if (not $besttime or ($sumtime < $besttime) or (($besttime == $sumtime) and ($$tunevar == $startval))) {\r
+ $besttime = $sumtime;\r
+ $best_val = $$tunevar;\r
+ }\r
+ print STDOUT "Value: $$tunevar Time: $sumtime\n" if $verbose;\r
+ }\r
+ $$tunevar = $best_val;\r
+ print STDOUT "Best Value: $best_val\n";\r
+ if ($best_val != $startval) {\r
+ $changed = 1;\r
+ }\r
+}\r
+\r
+# Determine the cutoff size below which std::sort is faster.\r
+sub TuneMinSize {\r
+ for (; $min_sort_size <= $defFileSize; $min_sort_size *= 2) {\r
+ $loopCount = ($defFileSize/$min_sort_size)/10;\r
+ $fileSize = $min_sort_size;\r
+ WriteConstants();\r
+ $std = "";\r
+ $sumtime = SumTimes();\r
+ $std = "-std";\r
+ $stdtime = SumTimes();\r
+ print STDOUT "Size: $min_sort_size boost::sort Time: $sumtime std::sort Time: $stdtime\n";\r
+ last if ($stdtime > $sumtime);\r
+ }\r
+}\r