[ceph.git] / ceph / src / boost / libs / compute / include / boost / compute / algorithm / detail / count_if_with_threads.hpp

//---------------------------------------------------------------------------//
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//

#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP

#include <numeric>

#include <boost/compute/detail/meta_kernel.hpp>
#include <boost/compute/container/vector.hpp>

namespace boost {
namespace compute {
namespace detail {

template<class InputIterator, class Predicate>
class count_if_with_threads_kernel : meta_kernel
{
public:
    typedef typename
        std::iterator_traits<InputIterator>::value_type
        value_type;

    count_if_with_threads_kernel()
        : meta_kernel("count_if_with_threads")
    {
    }

    void set_args(InputIterator first,
                  InputIterator last,
                  Predicate predicate)

    {
        typedef typename std::iterator_traits<InputIterator>::value_type T;

        m_size = detail::iterator_range_size(first, last);

        m_size_arg = add_arg<const ulong_>("size");
        m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");

        *this <<
            // thread parameters
            "const uint gid = get_global_id(0);\n" <<
            "const uint block_size = size / get_global_size(0);\n" <<
            "const uint start = block_size * gid;\n" <<
            "uint end = 0;\n" <<
            "if(gid == get_global_size(0) - 1)\n" <<
            "    end = size;\n" <<
            "else\n" <<
            "    end = block_size * gid + block_size;\n" <<

            // count values
            "uint count = 0;\n" <<
            "for(uint i = start; i < end; i++){\n" <<
                decl<const T>("value") << "="
                    << first[expr<uint_>("i")] << ";\n" <<
                if_(predicate(var<const T>("value"))) << "{\n" <<
                    "count++;\n" <<
                "}\n" <<
            "}\n" <<

            // write count
            "counts[gid] = count;\n";
    }

    size_t exec(command_queue &queue)
    {
        const device &device = queue.get_device();
        const context &context = queue.get_context();

        size_t threads = device.compute_units();

        const size_t minimum_block_size = 2048;
        if(m_size / threads < minimum_block_size){
            threads = static_cast<size_t>(
                          (std::max)(
                              std::ceil(float(m_size) / minimum_block_size),
                              1.0f
                          )
                      );
        }

        // storage for counts
        ::boost::compute::vector<ulong_> counts(threads, context);

        // exec kernel
        set_arg(m_size_arg, static_cast<ulong_>(m_size));
        set_arg(m_counts_arg, counts.get_buffer());
        exec_1d(queue, 0, threads, 1);

        // copy counts to the host
        std::vector<ulong_> host_counts(threads);
        ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);

        // return sum of counts
        return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
    }

private:
    size_t m_size;
    size_t m_size_arg;
    size_t m_counts_arg;
};

// counts values that match the predicate using one thread per block. this is
// optimized for cpu-type devices with a small number of compute units.
template<class InputIterator, class Predicate>
inline size_t count_if_with_threads(InputIterator first,
                                    InputIterator last,
                                    Predicate predicate,
                                    command_queue &queue)
{
    count_if_with_threads_kernel<InputIterator, Predicate> kernel;
    kernel.set_args(first, last, predicate);
    return kernel.exec(queue);
}

} // end detail namespace
} // end compute namespace
} // end boost namespace

#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
Commit	Line	Data
7c673cae FG	1	//---------------------------------------------------------------------------//
	2	// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
	3	//
	4	// Distributed under the Boost Software License, Version 1.0
	5	// See accompanying file LICENSE_1_0.txt or copy at
	6	// http://www.boost.org/LICENSE_1_0.txt
	7	//
	8	// See http://boostorg.github.com/compute for more information.
	9	//---------------------------------------------------------------------------//
	10
	11	#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
	12	#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
	13
	14	#include <numeric>
	15
	16	#include <boost/compute/detail/meta_kernel.hpp>
	17	#include <boost/compute/container/vector.hpp>
	18
	19	namespace boost {
	20	namespace compute {
	21	namespace detail {
	22
	23	template<class InputIterator, class Predicate>
	24	class count_if_with_threads_kernel : meta_kernel
	25	{
	26	public:
	27	typedef typename
	28	std::iterator_traits<InputIterator>::value_type
	29	value_type;
	30
	31	count_if_with_threads_kernel()
	32	: meta_kernel("count_if_with_threads")
	33	{
	34	}
	35
	36	void set_args(InputIterator first,
	37	InputIterator last,
	38	Predicate predicate)
	39
	40	{
	41	typedef typename std::iterator_traits<InputIterator>::value_type T;
	42
	43	m_size = detail::iterator_range_size(first, last);
	44
	45	m_size_arg = add_arg<const ulong_>("size");
	46	m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
	47
	48	*this <<
	49	// thread parameters
	50	"const uint gid = get_global_id(0);\n" <<
	51	"const uint block_size = size / get_global_size(0);\n" <<
	52	"const uint start = block_size * gid;\n" <<
	53	"uint end = 0;\n" <<
	54	"if(gid == get_global_size(0) - 1)\n" <<
	55	" end = size;\n" <<
	56	"else\n" <<
	57	" end = block_size * gid + block_size;\n" <<
	58
	59	// count values
	60	"uint count = 0;\n" <<
	61	"for(uint i = start; i < end; i++){\n" <<
	62	decl<const T>("value") << "="
	63	<< first[expr<uint_>("i")] << ";\n" <<
	64	if_(predicate(var<const T>("value"))) << "{\n" <<
65	"count++;\n" <<
66	"}\n" <<
67	"}\n" <<
68
69	// write count
70	"counts[gid] = count;\n";
71	}
72
73	size_t exec(command_queue &queue)
74	{
75	const device &device = queue.get_device();
76	const context &context = queue.get_context();
77
78	size_t threads = device.compute_units();
79
80	const size_t minimum_block_size = 2048;
81	if(m_size / threads < minimum_block_size){
82	threads = static_cast<size_t>(
83	(std::max)(
84	std::ceil(float(m_size) / minimum_block_size),
85	1.0f
86	)
87	);
88	}
89
90	// storage for counts
91	::boost::compute::vector<ulong_> counts(threads, context);
92
93	// exec kernel
94	set_arg(m_size_arg, static_cast<ulong_>(m_size));
95	set_arg(m_counts_arg, counts.get_buffer());
96	exec_1d(queue, 0, threads, 1);
97
98	// copy counts to the host
99	std::vector<ulong_> host_counts(threads);
100	::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
101
102	// return sum of counts
103	return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
104	}
105
106	private:
107	size_t m_size;
108	size_t m_size_arg;
109	size_t m_counts_arg;
110	};
111
112	// counts values that match the predicate using one thread per block. this is
113	// optimized for cpu-type devices with a small number of compute units.
114	template<class InputIterator, class Predicate>
115	inline size_t count_if_with_threads(InputIterator first,
116	InputIterator last,
117	Predicate predicate,
118	command_queue &queue)
119	{
120	count_if_with_threads_kernel<InputIterator, Predicate> kernel;
121	kernel.set_args(first, last, predicate);
122	return kernel.exec(queue);
123	}
124
125	} // end detail namespace
126	} // end compute namespace
127	} // end boost namespace
128
129	#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP