1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
16 #include <boost/program_options.hpp>
18 #include <boost/compute/system.hpp>
19 #include <boost/compute/algorithm/accumulate.hpp>
20 #include <boost/compute/container/vector.hpp>
24 namespace po
= boost::program_options
;
25 namespace compute
= boost::compute
;
29 return static_cast<int>((rand() / double(RAND_MAX
)) * 25.0);
33 double perf_accumulate(const compute::vector
<T
>& data
,
35 compute::command_queue
& queue
)
38 for(size_t trial
= 0; trial
< trials
; trial
++){
40 compute::accumulate(data
.begin(), data
.end(), T(0), queue
);
48 void tune_accumulate(const compute::vector
<T
>& data
,
50 compute::command_queue
& queue
)
52 boost::shared_ptr
<compute::detail::parameter_cache
>
53 params
= compute::detail::parameter_cache::get_global_cache(queue
.get_device());
55 const std::string cache_key
=
56 std::string("__boost_reduce_on_gpu_") + compute::type_name
<T
>();
58 const compute::uint_ tpbs
[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
59 const compute::uint_ vpts
[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
61 double min_time
= (std::numeric_limits
<double>::max
)();
62 compute::uint_ best_tpb
= 0;
63 compute::uint_ best_vpt
= 0;
65 for(size_t i
= 0; i
< sizeof(tpbs
) / sizeof(*tpbs
); i
++){
66 params
->set(cache_key
, "tpb", tpbs
[i
]);
67 for(size_t j
= 0; j
< sizeof(vpts
) / sizeof(*vpts
); j
++){
68 params
->set(cache_key
, "vpt", vpts
[j
]);
71 const double t
= perf_accumulate(data
, trials
, queue
);
78 catch(compute::opencl_error
&){
79 // invalid parameters for this device, skip
84 // store optimal parameters
85 params
->set(cache_key
, "tpb", best_tpb
);
86 params
->set(cache_key
, "vpt", best_vpt
);
89 int main(int argc
, char *argv
[])
91 // setup command line arguments
92 po::options_description
options("options");
94 ("help", "show usage instructions")
95 ("size", po::value
<size_t>()->default_value(8192), "input size")
96 ("trials", po::value
<size_t>()->default_value(3), "number of trials to run")
97 ("tune", "run tuning procedure")
99 po::positional_options_description positional_options
;
100 positional_options
.add("size", 1);
102 // parse command line
103 po::variables_map vm
;
105 po::command_line_parser(argc
, argv
)
106 .options(options
).positional(positional_options
).run(),
111 const size_t size
= vm
["size"].as
<size_t>();
112 const size_t trials
= vm
["trials"].as
<size_t>();
113 std::cout
<< "size: " << size
<< std::endl
;
115 // setup context and queue for the default device
116 compute::device device
= compute::system::default_device();
117 compute::context
context(device
);
118 compute::command_queue
queue(context
, device
);
119 std::cout
<< "device: " << device
.name() << std::endl
;
121 // create vector of random numbers on the host
122 std::vector
<int> host_data(size
);
123 std::generate(host_data
.begin(), host_data
.end(), rand_int
);
125 // create vector on the device and copy the data
126 compute::vector
<int> device_data(
127 host_data
.begin(), host_data
.end(), queue
130 // run tuning proceure (if requested)
131 if(vm
.count("tune")){
132 tune_accumulate(device_data
, trials
, queue
);
136 double t
= perf_accumulate(device_data
, trials
, queue
);
137 std::cout
<< "time: " << t
/ 1e6
<< " ms" << std::endl
;