]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/compute/include/boost/compute/algorithm/detail/count_if_with_threads.hpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / compute / include / boost / compute / algorithm / detail / count_if_with_threads.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
12 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
13
14 #include <numeric>
15
16 #include <boost/compute/detail/meta_kernel.hpp>
17 #include <boost/compute/container/vector.hpp>
18
19 namespace boost {
20 namespace compute {
21 namespace detail {
22
23 template<class InputIterator, class Predicate>
24 class count_if_with_threads_kernel : meta_kernel
25 {
26 public:
27 typedef typename
28 std::iterator_traits<InputIterator>::value_type
29 value_type;
30
31 count_if_with_threads_kernel()
32 : meta_kernel("count_if_with_threads")
33 {
34 }
35
36 void set_args(InputIterator first,
37 InputIterator last,
38 Predicate predicate)
39
40 {
41 typedef typename std::iterator_traits<InputIterator>::value_type T;
42
43 m_size = detail::iterator_range_size(first, last);
44
45 m_size_arg = add_arg<const ulong_>("size");
46 m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
47
48 *this <<
49 // thread parameters
50 "const uint gid = get_global_id(0);\n" <<
51 "const uint block_size = size / get_global_size(0);\n" <<
52 "const uint start = block_size * gid;\n" <<
53 "uint end = 0;\n" <<
54 "if(gid == get_global_size(0) - 1)\n" <<
55 " end = size;\n" <<
56 "else\n" <<
57 " end = block_size * gid + block_size;\n" <<
58
59 // count values
60 "uint count = 0;\n" <<
61 "for(uint i = start; i < end; i++){\n" <<
62 decl<const T>("value") << "="
63 << first[expr<uint_>("i")] << ";\n" <<
64 if_(predicate(var<const T>("value"))) << "{\n" <<
65 "count++;\n" <<
66 "}\n" <<
67 "}\n" <<
68
69 // write count
70 "counts[gid] = count;\n";
71 }
72
73 size_t exec(command_queue &queue)
74 {
75 const device &device = queue.get_device();
76 const context &context = queue.get_context();
77
78 size_t threads = device.compute_units();
79
80 const size_t minimum_block_size = 2048;
81 if(m_size / threads < minimum_block_size){
82 threads = static_cast<size_t>(
83 (std::max)(
84 std::ceil(float(m_size) / minimum_block_size),
85 1.0f
86 )
87 );
88 }
89
90 // storage for counts
91 ::boost::compute::vector<ulong_> counts(threads, context);
92
93 // exec kernel
94 set_arg(m_size_arg, static_cast<ulong_>(m_size));
95 set_arg(m_counts_arg, counts.get_buffer());
96 exec_1d(queue, 0, threads, 1);
97
98 // copy counts to the host
99 std::vector<ulong_> host_counts(threads);
100 ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
101
102 // return sum of counts
103 return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
104 }
105
106 private:
107 size_t m_size;
108 size_t m_size_arg;
109 size_t m_counts_arg;
110 };
111
112 // counts values that match the predicate using one thread per block. this is
113 // optimized for cpu-type devices with a small number of compute units.
114 template<class InputIterator, class Predicate>
115 inline size_t count_if_with_threads(InputIterator first,
116 InputIterator last,
117 Predicate predicate,
118 command_queue &queue)
119 {
120 count_if_with_threads_kernel<InputIterator, Predicate> kernel;
121 kernel.set_args(first, last, predicate);
122 return kernel.exec(queue);
123 }
124
125 } // end detail namespace
126 } // end compute namespace
127 } // end boost namespace
128
129 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP