]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | //---------------------------------------------------------------------------// |
2 | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP | |
12 | #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP | |
13 | ||
14 | #include <numeric> | |
15 | ||
16 | #include <boost/compute/detail/meta_kernel.hpp> | |
17 | #include <boost/compute/container/vector.hpp> | |
18 | ||
19 | namespace boost { | |
20 | namespace compute { | |
21 | namespace detail { | |
22 | ||
23 | template<class InputIterator, class Predicate> | |
24 | class count_if_with_threads_kernel : meta_kernel | |
25 | { | |
26 | public: | |
27 | typedef typename | |
28 | std::iterator_traits<InputIterator>::value_type | |
29 | value_type; | |
30 | ||
31 | count_if_with_threads_kernel() | |
32 | : meta_kernel("count_if_with_threads") | |
33 | { | |
34 | } | |
35 | ||
36 | void set_args(InputIterator first, | |
37 | InputIterator last, | |
38 | Predicate predicate) | |
39 | ||
40 | { | |
41 | typedef typename std::iterator_traits<InputIterator>::value_type T; | |
42 | ||
43 | m_size = detail::iterator_range_size(first, last); | |
44 | ||
45 | m_size_arg = add_arg<const ulong_>("size"); | |
46 | m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts"); | |
47 | ||
48 | *this << | |
49 | // thread parameters | |
50 | "const uint gid = get_global_id(0);\n" << | |
51 | "const uint block_size = size / get_global_size(0);\n" << | |
52 | "const uint start = block_size * gid;\n" << | |
53 | "uint end = 0;\n" << | |
54 | "if(gid == get_global_size(0) - 1)\n" << | |
55 | " end = size;\n" << | |
56 | "else\n" << | |
57 | " end = block_size * gid + block_size;\n" << | |
58 | ||
59 | // count values | |
60 | "uint count = 0;\n" << | |
61 | "for(uint i = start; i < end; i++){\n" << | |
62 | decl<const T>("value") << "=" | |
63 | << first[expr<uint_>("i")] << ";\n" << | |
64 | if_(predicate(var<const T>("value"))) << "{\n" << | |
65 | "count++;\n" << | |
66 | "}\n" << | |
67 | "}\n" << | |
68 | ||
69 | // write count | |
70 | "counts[gid] = count;\n"; | |
71 | } | |
72 | ||
73 | size_t exec(command_queue &queue) | |
74 | { | |
75 | const device &device = queue.get_device(); | |
76 | const context &context = queue.get_context(); | |
77 | ||
78 | size_t threads = device.compute_units(); | |
79 | ||
80 | const size_t minimum_block_size = 2048; | |
81 | if(m_size / threads < minimum_block_size){ | |
82 | threads = static_cast<size_t>( | |
83 | (std::max)( | |
84 | std::ceil(float(m_size) / minimum_block_size), | |
85 | 1.0f | |
86 | ) | |
87 | ); | |
88 | } | |
89 | ||
90 | // storage for counts | |
91 | ::boost::compute::vector<ulong_> counts(threads, context); | |
92 | ||
93 | // exec kernel | |
94 | set_arg(m_size_arg, static_cast<ulong_>(m_size)); | |
95 | set_arg(m_counts_arg, counts.get_buffer()); | |
96 | exec_1d(queue, 0, threads, 1); | |
97 | ||
98 | // copy counts to the host | |
99 | std::vector<ulong_> host_counts(threads); | |
100 | ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue); | |
101 | ||
102 | // return sum of counts | |
103 | return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0)); | |
104 | } | |
105 | ||
106 | private: | |
107 | size_t m_size; | |
108 | size_t m_size_arg; | |
109 | size_t m_counts_arg; | |
110 | }; | |
111 | ||
112 | // counts values that match the predicate using one thread per block. this is | |
113 | // optimized for cpu-type devices with a small number of compute units. | |
114 | template<class InputIterator, class Predicate> | |
115 | inline size_t count_if_with_threads(InputIterator first, | |
116 | InputIterator last, | |
117 | Predicate predicate, | |
118 | command_queue &queue) | |
119 | { | |
120 | count_if_with_threads_kernel<InputIterator, Predicate> kernel; | |
121 | kernel.set_args(first, last, predicate); | |
122 | return kernel.exec(queue); | |
123 | } | |
124 | ||
125 | } // end detail namespace | |
126 | } // end compute namespace | |
127 | } // end boost namespace | |
128 | ||
129 | #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP |