[ceph.git] / ceph / src / boost / libs / compute / include / boost / compute / detail / work_size.hpp

//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//

#ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
#define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP

#include <cmath>

namespace boost {
namespace compute {
namespace detail {

// Given a total number of values (count), a number of values to
// process per thread (vtp), and a number of threads to execute per
// block (tpb), this function returns the global work size to be
// passed to clEnqueueNDRangeKernel() for a 1D algorithm.
inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb)
{
    size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt));
    if(work_size % tpb != 0){
        work_size += tpb - work_size % tpb;
    }
    return work_size;
}

} // end detail namespace
} // end compute namespace
} // end boost namespace

#endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
Commit	Line	Data
7c673cae FG	1	//---------------------------------------------------------------------------//
	2	// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
	3	//
	4	// Distributed under the Boost Software License, Version 1.0
	5	// See accompanying file LICENSE_1_0.txt or copy at
	6	// http://www.boost.org/LICENSE_1_0.txt
	7	//
	8	// See http://boostorg.github.com/compute for more information.
	9	//---------------------------------------------------------------------------//
	10
	11	#ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
	12	#define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP
	13
	14	#include <cmath>
	15
	16	namespace boost {
	17	namespace compute {
	18	namespace detail {
	19
	20	// Given a total number of values (count), a number of values to
	21	// process per thread (vtp), and a number of threads to execute per
	22	// block (tpb), this function returns the global work size to be
	23	// passed to clEnqueueNDRangeKernel() for a 1D algorithm.
	24	inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb)
	25	{
	26	size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt));
	27	if(work_size % tpb != 0){
	28	work_size += tpb - work_size % tpb;
	29	}
	30	return work_size;
	31	}
	32
	33	} // end detail namespace
	34	} // end compute namespace
	35	} // end boost namespace
	36
	37	#endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP