ceph/src/boost/libs/compute/test/test_command_queue.cpp

   1 //---------------------------------------------------------------------------//
   2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
   3 //
   4 // Distributed under the Boost Software License, Version 1.0
   5 // See accompanying file LICENSE_1_0.txt or copy at
   6 // http://www.boost.org/LICENSE_1_0.txt
   7 //
   8 // See http://boostorg.github.com/compute for more information.
   9 //---------------------------------------------------------------------------//
  10
  11 #define BOOST_TEST_MODULE TestCommandQueue
  12 #include <boost/test/unit_test.hpp>
  13
  14 #include <iostream>
  15
  16 #include <boost/compute/kernel.hpp>
  17 #include <boost/compute/system.hpp>
  18 #include <boost/compute/program.hpp>
  19 #include <boost/compute/command_queue.hpp>
  20 #include <boost/compute/algorithm/fill.hpp>
  21 #include <boost/compute/container/vector.hpp>
  22 #include <boost/compute/utility/dim.hpp>
  23 #include <boost/compute/utility/source.hpp>
  24 #include <boost/compute/detail/diagnostic.hpp>
  25
  26 #include "check_macros.hpp"
  27 #include "context_setup.hpp"
  28
  29 namespace bc = boost::compute;
  30 namespace compute = boost::compute;
  31
  32 BOOST_AUTO_TEST_CASE(get_context)
  33 {
  34     BOOST_VERIFY(queue.get_context() == context);
  35     BOOST_VERIFY(queue.get_info<CL_QUEUE_CONTEXT>() == context.get());
  36 }
  37
  38 BOOST_AUTO_TEST_CASE(get_device)
  39 {
  40     BOOST_VERIFY(queue.get_info<CL_QUEUE_DEVICE>() == device.get());
  41 }
  42
  43 BOOST_AUTO_TEST_CASE(equality_operator)
  44 {
  45     compute::command_queue queue1(context, device);
  46     BOOST_CHECK(queue1 == queue1);
  47
  48     compute::command_queue queue2 = queue1;
  49     BOOST_CHECK(queue1 == queue2);
  50
  51     compute::command_queue queue3(context, device);
  52     BOOST_CHECK(queue1 != queue3);
  53 }
  54
  55 BOOST_AUTO_TEST_CASE(event_profiling)
  56 {
  57     bc::command_queue queue(context, device, bc::command_queue::enable_profiling);
  58
  59     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
  60     bc::buffer buffer(context, sizeof(data));
  61
  62     bc::event event =
  63         queue.enqueue_write_buffer_async(buffer,
  64                                          0,
  65                                          sizeof(data),
  66                                          static_cast<const void *>(data));
  67     queue.finish();
  68
  69     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
  70     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
  71     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
  72     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
  73 }
  74
  75 BOOST_AUTO_TEST_CASE(kernel_profiling)
  76 {
  77     // create queue with profiling enabled
  78     boost::compute::command_queue queue(
  79         context, device, boost::compute::command_queue::enable_profiling
  80     );
  81
  82     // input data
  83     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
  84     boost::compute::buffer buffer(context, sizeof(data));
  85
  86     // copy input data to device
  87     queue.enqueue_write_buffer(buffer, 0, sizeof(data), data);
  88
  89     // setup kernel
  90     const char source[] =
  91         "__kernel void iscal(__global int *buffer, int alpha)\n"
  92         "{\n"
  93         "    buffer[get_global_id(0)] *= alpha;\n"
  94         "}\n";
  95
  96     boost::compute::program program =
  97         boost::compute::program::create_with_source(source, context);
  98     program.build();
  99
 100     boost::compute::kernel kernel(program, "iscal");
 101     kernel.set_arg(0, buffer);
 102     kernel.set_arg(1, 2);
 103
 104     // execute kernel
 105     size_t global_work_offset = 0;
 106     size_t global_work_size = 8;
 107
 108     boost::compute::event event =
 109         queue.enqueue_nd_range_kernel(kernel,
 110                                       size_t(1),
 111                                       &global_work_offset,
 112                                       &global_work_size,
 113                                       0);
 114
 115     // wait until kernel is finished
 116     event.wait();
 117
 118     // check profiling information
 119     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
 120     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
 121     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
 122     event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
 123
 124     // read results back to host
 125     queue.enqueue_read_buffer(buffer, 0, sizeof(data), data);
 126
 127     // check results
 128     BOOST_CHECK_EQUAL(data[0], 2);
 129     BOOST_CHECK_EQUAL(data[1], 4);
 130     BOOST_CHECK_EQUAL(data[2], 6);
 131     BOOST_CHECK_EQUAL(data[3], 8);
 132     BOOST_CHECK_EQUAL(data[4], 10);
 133     BOOST_CHECK_EQUAL(data[5], 12);
 134     BOOST_CHECK_EQUAL(data[6], 14);
 135     BOOST_CHECK_EQUAL(data[7], 16);
 136 }
 137
 138 BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)
 139 {
 140     // create cl_command_queue
 141     cl_command_queue cl_queue;
 142 #ifdef CL_VERSION_2_0
 143     if (device.check_version(2, 0)){ // runtime check
 144         cl_queue =
 145             clCreateCommandQueueWithProperties(context, device.id(), 0, 0);
 146     } else
 147 #endif // CL_VERSION_2_0
 148     {
 149         // Suppress deprecated declarations warning
 150         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
 151         cl_queue =
 152             clCreateCommandQueue(context, device.id(), 0, 0);
 153         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
 154     }
 155     BOOST_VERIFY(cl_queue);
 156
 157     // create boost::compute::command_queue
 158     boost::compute::command_queue queue(cl_queue);
 159
 160     // check queue
 161     BOOST_CHECK(queue.get_context() == context);
 162     BOOST_CHECK(cl_command_queue(queue) == cl_queue);
 163
 164     // cleanup cl_command_queue
 165     clReleaseCommandQueue(cl_queue);
 166 }
 167
 168 #ifdef CL_VERSION_1_1
 169 BOOST_AUTO_TEST_CASE(write_buffer_rect)
 170 {
 171     REQUIRES_OPENCL_VERSION(1, 1);
 172
 173     // skip this test on AMD GPUs due to a buggy implementation
 174     // of the clEnqueueWriteBufferRect() function
 175     if(device.vendor() == "Advanced Micro Devices, Inc." &&
 176        device.type() & boost::compute::device::gpu){
 177         std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl;
 178         return;
 179     }
 180
 181     int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 182     boost::compute::buffer buffer(context, 8 * sizeof(int));
 183
 184     // copy every other value to the buffer
 185     size_t buffer_origin[] = { 0, 0, 0 };
 186     size_t host_origin[] = { 0, 0, 0 };
 187     size_t region[] = { sizeof(int), sizeof(int), 1 };
 188
 189     queue.enqueue_write_buffer_rect(
 190         buffer,
 191         buffer_origin,
 192         host_origin,
 193         region,
 194         sizeof(int),
 195         0,
 196         2 * sizeof(int),
 197         0,
 198         data
 199     );
 200
 201     // check output values
 202     int output[4];
 203     queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output);
 204     BOOST_CHECK_EQUAL(output[0], 1);
 205     BOOST_CHECK_EQUAL(output[1], 3);
 206     BOOST_CHECK_EQUAL(output[2], 5);
 207     BOOST_CHECK_EQUAL(output[3], 7);
 208 }
 209 #endif // CL_VERSION_1_1
 210
 211 static bool nullary_kernel_executed = false;
 212
 213 static void nullary_kernel()
 214 {
 215     nullary_kernel_executed = true;
 216 }
 217
 218 BOOST_AUTO_TEST_CASE(native_kernel)
 219 {
 220     cl_device_exec_capabilities exec_capabilities =
 221         device.get_info<CL_DEVICE_EXECUTION_CAPABILITIES>();
 222     if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){
 223         std::cerr << "skipping native_kernel test: "
 224                   << "device does not support CL_EXEC_NATIVE_KERNEL"
 225                   << std::endl;
 226         return;
 227     }
 228
 229     compute::vector<int> vector(1000, context);
 230     compute::fill(vector.begin(), vector.end(), 42, queue);
 231     BOOST_CHECK_EQUAL(nullary_kernel_executed, false);
 232     queue.enqueue_native_kernel(&nullary_kernel);
 233     queue.finish();
 234     BOOST_CHECK_EQUAL(nullary_kernel_executed, true);
 235 }
 236
 237 BOOST_AUTO_TEST_CASE(copy_with_wait_list)
 238 {
 239     int data1[] = { 1, 3, 5, 7 };
 240     int data2[] = { 2, 4, 6, 8 };
 241
 242     compute::buffer buf1(context, 4 * sizeof(int));
 243     compute::buffer buf2(context, 4 * sizeof(int));
 244
 245     compute::event write_event1 =
 246         queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1);
 247
 248     compute::event write_event2 =
 249         queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2);
 250
 251     compute::event read_event1 =
 252         queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1);
 253
 254     compute::event read_event2 =
 255         queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2);
 256
 257     read_event1.wait();
 258     read_event2.wait();
 259
 260     CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8));
 261     CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7));
 262 }
 263
 264 BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
 265 {
 266     using boost::compute::dim;
 267     using boost::compute::uint_;
 268
 269     const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
 270         __kernel void foo(__global int *output1, __global int *output2)
 271         {
 272             output1[get_global_id(0)] = get_local_id(0);
 273             output2[get_global_id(1)] = get_local_id(1);
 274         }
 275     );
 276
 277     compute::kernel kernel =
 278         compute::kernel::create_with_source(source, "foo", context);
 279
 280     compute::vector<uint_> output1(4, context);
 281     compute::vector<uint_> output2(4, context);
 282
 283     kernel.set_arg(0, output1);
 284     kernel.set_arg(1, output2);
 285
 286     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));
 287
 288     CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
 289     CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
 290
 291     // Maximum number of work-items that can be specified in each
 292     // dimension of the work-group to clEnqueueNDRangeKernel.
 293     std::vector<size_t> max_work_item_sizes =
 294         device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
 295
 296     if(max_work_item_sizes[0] < size_t(2)) {
 297         return;
 298     }
 299
 300     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));
 301
 302     CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
 303     CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
 304
 305     if(max_work_item_sizes[1] < size_t(2)) {
 306         return;
 307     }
 308
 309     queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));
 310
 311     CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
 312     CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1));
 313 }
 314
 315 BOOST_AUTO_TEST_SUITE_END()