1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
11 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
12 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
16 #include <boost/compute/detail/meta_kernel.hpp>
17 #include <boost/compute/container/vector.hpp>
23 template<class InputIterator, class Predicate>
24 class count_if_with_threads_kernel : meta_kernel
28 std::iterator_traits<InputIterator>::value_type
31 count_if_with_threads_kernel()
32 : meta_kernel("count_if_with_threads")
36 void set_args(InputIterator first,
41 typedef typename std::iterator_traits<InputIterator>::value_type T;
43 m_size = detail::iterator_range_size(first, last);
45 m_size_arg = add_arg<const ulong_>("size");
46 m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
50 "const uint gid = get_global_id(0);\n" <<
51 "const uint block_size = size / get_global_size(0);\n" <<
52 "const uint start = block_size * gid;\n" <<
54 "if(gid == get_global_size(0) - 1)\n" <<
57 " end = block_size * gid + block_size;\n" <<
60 "uint count = 0;\n" <<
61 "for(uint i = start; i < end; i++){\n" <<
62 decl<const T>("value") << "="
63 << first[expr<uint_>("i")] << ";\n" <<
64 if_(predicate(var<const T>("value"))) << "{\n" <<
70 "counts[gid] = count;\n";
73 size_t exec(command_queue &queue)
75 const device &device = queue.get_device();
76 const context &context = queue.get_context();
78 size_t threads = device.compute_units();
80 const size_t minimum_block_size = 2048;
81 if(m_size / threads < minimum_block_size){
82 threads = static_cast<size_t>(
84 std::ceil(float(m_size) / minimum_block_size),
91 ::boost::compute::vector<ulong_> counts(threads, context);
94 set_arg(m_size_arg, static_cast<ulong_>(m_size));
95 set_arg(m_counts_arg, counts.get_buffer());
96 exec_1d(queue, 0, threads, 1);
98 // copy counts to the host
99 std::vector<ulong_> host_counts(threads);
100 ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
102 // return sum of counts
103 return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
112 // counts values that match the predicate using one thread per block. this is
113 // optimized for cpu-type devices with a small number of compute units.
114 template<class InputIterator, class Predicate>
115 inline size_t count_if_with_threads(InputIterator first,
118 command_queue &queue)
120 count_if_with_threads_kernel<InputIterator, Predicate> kernel;
121 kernel.set_args(first, last, predicate);
122 return kernel.exec(queue);
125 } // end detail namespace
126 } // end compute namespace
127 } // end boost namespace
129 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP