]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/libs/compute/include/boost/compute/algorithm/detail/count_if_with_threads.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / compute / include / boost / compute / algorithm / detail / count_if_with_threads.hpp
CommitLineData
7c673cae
FG
1//---------------------------------------------------------------------------//
2// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3//
4// Distributed under the Boost Software License, Version 1.0
5// See accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt
7//
8// See http://boostorg.github.com/compute for more information.
9//---------------------------------------------------------------------------//
10
11#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
12#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
13
14#include <numeric>
15
16#include <boost/compute/detail/meta_kernel.hpp>
17#include <boost/compute/container/vector.hpp>
18
19namespace boost {
20namespace compute {
21namespace detail {
22
23template<class InputIterator, class Predicate>
24class count_if_with_threads_kernel : meta_kernel
25{
26public:
27 typedef typename
28 std::iterator_traits<InputIterator>::value_type
29 value_type;
30
31 count_if_with_threads_kernel()
32 : meta_kernel("count_if_with_threads")
33 {
34 }
35
36 void set_args(InputIterator first,
37 InputIterator last,
38 Predicate predicate)
39
40 {
41 typedef typename std::iterator_traits<InputIterator>::value_type T;
42
43 m_size = detail::iterator_range_size(first, last);
44
45 m_size_arg = add_arg<const ulong_>("size");
46 m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
47
48 *this <<
49 // thread parameters
50 "const uint gid = get_global_id(0);\n" <<
51 "const uint block_size = size / get_global_size(0);\n" <<
52 "const uint start = block_size * gid;\n" <<
53 "uint end = 0;\n" <<
54 "if(gid == get_global_size(0) - 1)\n" <<
55 " end = size;\n" <<
56 "else\n" <<
57 " end = block_size * gid + block_size;\n" <<
58
59 // count values
60 "uint count = 0;\n" <<
61 "for(uint i = start; i < end; i++){\n" <<
62 decl<const T>("value") << "="
63 << first[expr<uint_>("i")] << ";\n" <<
64 if_(predicate(var<const T>("value"))) << "{\n" <<
65 "count++;\n" <<
66 "}\n" <<
67 "}\n" <<
68
69 // write count
70 "counts[gid] = count;\n";
71 }
72
73 size_t exec(command_queue &queue)
74 {
75 const device &device = queue.get_device();
76 const context &context = queue.get_context();
77
78 size_t threads = device.compute_units();
79
80 const size_t minimum_block_size = 2048;
81 if(m_size / threads < minimum_block_size){
82 threads = static_cast<size_t>(
83 (std::max)(
84 std::ceil(float(m_size) / minimum_block_size),
85 1.0f
86 )
87 );
88 }
89
90 // storage for counts
91 ::boost::compute::vector<ulong_> counts(threads, context);
92
93 // exec kernel
94 set_arg(m_size_arg, static_cast<ulong_>(m_size));
95 set_arg(m_counts_arg, counts.get_buffer());
96 exec_1d(queue, 0, threads, 1);
97
98 // copy counts to the host
99 std::vector<ulong_> host_counts(threads);
100 ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
101
102 // return sum of counts
103 return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
104 }
105
106private:
107 size_t m_size;
108 size_t m_size_arg;
109 size_t m_counts_arg;
110};
111
112// counts values that match the predicate using one thread per block. this is
113// optimized for cpu-type devices with a small number of compute units.
114template<class InputIterator, class Predicate>
115inline size_t count_if_with_threads(InputIterator first,
116 InputIterator last,
117 Predicate predicate,
118 command_queue &queue)
119{
120 count_if_with_threads_kernel<InputIterator, Predicate> kernel;
121 kernel.set_args(first, last, predicate);
122 return kernel.exec(queue);
123}
124
125} // end detail namespace
126} // end compute namespace
127} // end boost namespace
128
129#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP