]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/compute/algorithm/copy.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / compute / algorithm / copy.hpp
CommitLineData
7c673cae
FG
1//---------------------------------------------------------------------------//
2// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3//
4// Distributed under the Boost Software License, Version 1.0
5// See accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt
7//
8// See http://boostorg.github.com/compute for more information.
9//---------------------------------------------------------------------------//
10
11#ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP
12#define BOOST_COMPUTE_ALGORITHM_COPY_HPP
13
14#include <algorithm>
15#include <iterator>
16
17#include <boost/utility/enable_if.hpp>
18
19#include <boost/mpl/and.hpp>
20#include <boost/mpl/not.hpp>
21#include <boost/mpl/or.hpp>
22
23#include <boost/compute/buffer.hpp>
24#include <boost/compute/system.hpp>
25#include <boost/compute/command_queue.hpp>
26#include <boost/compute/algorithm/detail/copy_on_device.hpp>
27#include <boost/compute/algorithm/detail/copy_to_device.hpp>
28#include <boost/compute/algorithm/detail/copy_to_host.hpp>
29#include <boost/compute/async/future.hpp>
30#include <boost/compute/container/mapped_view.hpp>
31#include <boost/compute/detail/device_ptr.hpp>
32#include <boost/compute/detail/is_contiguous_iterator.hpp>
33#include <boost/compute/detail/iterator_range_size.hpp>
34#include <boost/compute/detail/parameter_cache.hpp>
35#include <boost/compute/iterator/buffer_iterator.hpp>
36#include <boost/compute/type_traits/type_name.hpp>
37#include <boost/compute/type_traits/is_device_iterator.hpp>
38
39namespace boost {
40namespace compute {
41namespace detail {
42
43namespace mpl = boost::mpl;
44
45// meta-function returning true if copy() between InputIterator and
46// OutputIterator can be implemented with clEnqueueCopyBuffer().
47template<class InputIterator, class OutputIterator>
48struct can_copy_with_copy_buffer :
49 mpl::and_<
50 mpl::or_<
51 boost::is_same<
52 InputIterator,
53 buffer_iterator<typename InputIterator::value_type>
54 >,
55 boost::is_same<
56 InputIterator,
57 detail::device_ptr<typename InputIterator::value_type>
58 >
59 >,
60 mpl::or_<
61 boost::is_same<
62 OutputIterator,
63 buffer_iterator<typename OutputIterator::value_type>
64 >,
65 boost::is_same<
66 OutputIterator,
67 detail::device_ptr<typename OutputIterator::value_type>
68 >
69 >,
70 boost::is_same<
71 typename InputIterator::value_type,
72 typename OutputIterator::value_type
73 >
74 >::type {};
75
76// meta-function returning true if value_types of HostIterator and
77// DeviceIterator are same
78template<class HostIterator, class DeviceIterator>
79struct is_same_value_type :
80 boost::is_same<
81 typename boost::remove_cv<
82 typename std::iterator_traits<HostIterator>::value_type
83 >::type,
84 typename boost::remove_cv<
85 typename DeviceIterator::value_type
86 >::type
87 >::type {};
88
89// meta-function returning true if value_type of HostIterator is bool
90template<class HostIterator>
91struct is_bool_value_type :
92 boost::is_same<
93 typename boost::remove_cv<
94 typename std::iterator_traits<HostIterator>::value_type
95 >::type,
96 bool
97 >::type {};
98
99// host -> device (async)
100template<class InputIterator, class OutputIterator>
101inline future<OutputIterator>
102dispatch_copy_async(InputIterator first,
103 InputIterator last,
104 OutputIterator result,
105 command_queue &queue,
92f5a8d4 106 const wait_list &events,
7c673cae
FG
107 typename boost::enable_if<
108 mpl::and_<
109 mpl::not_<
110 is_device_iterator<InputIterator>
111 >,
112 is_device_iterator<OutputIterator>,
113 is_same_value_type<InputIterator, OutputIterator>
114 >
115 >::type* = 0)
116{
117 BOOST_STATIC_ASSERT_MSG(
118 is_contiguous_iterator<InputIterator>::value,
119 "copy_async() is only supported for contiguous host iterators"
120 );
121
92f5a8d4 122 return copy_to_device_async(first, last, result, queue, events);
7c673cae
FG
123}
124
125// host -> device (async)
126// Type mismatch between InputIterator and OutputIterator value_types
127template<class InputIterator, class OutputIterator>
128inline future<OutputIterator>
129dispatch_copy_async(InputIterator first,
130 InputIterator last,
131 OutputIterator result,
132 command_queue &queue,
92f5a8d4 133 const wait_list &events,
7c673cae
FG
134 typename boost::enable_if<
135 mpl::and_<
136 mpl::not_<
137 is_device_iterator<InputIterator>
138 >,
139 is_device_iterator<OutputIterator>,
140 mpl::not_<
141 is_same_value_type<InputIterator, OutputIterator>
142 >
143 >
144 >::type* = 0)
145{
146 BOOST_STATIC_ASSERT_MSG(
147 is_contiguous_iterator<InputIterator>::value,
148 "copy_async() is only supported for contiguous host iterators"
149 );
150
151 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
152
153 const context &context = queue.get_context();
154 size_t count = iterator_range_size(first, last);
155
156 if(count < size_t(1)) {
157 return future<OutputIterator>();
158 }
159
160 // map [first; last) to device and run copy kernel
161 // on device for copying & casting
162 ::boost::compute::mapped_view<input_type> mapped_host(
163 // make sure it's a pointer to constant data
164 // to force read only mapping
165 const_cast<const input_type*>(
166 ::boost::addressof(*first)
167 ),
168 count,
169 context
170 );
171 return copy_on_device_async(
92f5a8d4 172 mapped_host.begin(), mapped_host.end(), result, queue, events
7c673cae
FG
173 );
174}
175
176// host -> device
177// InputIterator is a contiguous iterator
178template<class InputIterator, class OutputIterator>
179inline OutputIterator
180dispatch_copy(InputIterator first,
181 InputIterator last,
182 OutputIterator result,
183 command_queue &queue,
92f5a8d4 184 const wait_list &events,
7c673cae
FG
185 typename boost::enable_if<
186 mpl::and_<
187 mpl::not_<
188 is_device_iterator<InputIterator>
189 >,
190 is_device_iterator<OutputIterator>,
191 is_same_value_type<InputIterator, OutputIterator>,
192 is_contiguous_iterator<InputIterator>
193 >
194 >::type* = 0)
195{
92f5a8d4 196 return copy_to_device(first, last, result, queue, events);
7c673cae
FG
197}
198
199// host -> device
200// Type mismatch between InputIterator and OutputIterator value_types
201// InputIterator is a contiguous iterator
202template<class InputIterator, class OutputIterator>
203inline OutputIterator
204dispatch_copy(InputIterator first,
205 InputIterator last,
206 OutputIterator result,
207 command_queue &queue,
92f5a8d4 208 const wait_list &events,
7c673cae
FG
209 typename boost::enable_if<
210 mpl::and_<
211 mpl::not_<
212 is_device_iterator<InputIterator>
213 >,
214 is_device_iterator<OutputIterator>,
215 mpl::not_<
216 is_same_value_type<InputIterator, OutputIterator>
217 >,
218 is_contiguous_iterator<InputIterator>
219 >
220 >::type* = 0)
221{
222 typedef typename OutputIterator::value_type output_type;
223 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
224
225 const device &device = queue.get_device();
226
227 // loading parameters
228 std::string cache_key =
229 std::string("__boost_compute_copy_to_device_")
230 + type_name<input_type>() + "_" + type_name<output_type>();
231 boost::shared_ptr<parameter_cache> parameters =
232 detail::parameter_cache::get_global_cache(device);
233
b32b8144
FG
234 uint_ map_copy_threshold;
235 uint_ direct_copy_threshold;
7c673cae
FG
236
237 // calculate default values of thresholds
238 if (device.type() & device::gpu) {
239 // GPUs
240 map_copy_threshold = 524288; // 0.5 MB
241 direct_copy_threshold = 52428800; // 50 MB
242 }
243 else {
244 // CPUs and other devices
245 map_copy_threshold = 134217728; // 128 MB
246 direct_copy_threshold = 0; // it's never efficient for CPUs
247 }
248
249 // load thresholds
250 map_copy_threshold =
251 parameters->get(
252 cache_key, "map_copy_threshold", map_copy_threshold
253 );
254 direct_copy_threshold =
255 parameters->get(
256 cache_key, "direct_copy_threshold", direct_copy_threshold
257 );
258
259 // select copy method based on thresholds & input_size_bytes
260 size_t count = iterator_range_size(first, last);
261 size_t input_size_bytes = count * sizeof(input_type);
262
263 // [0; map_copy_threshold) -> copy_to_device_map()
264 if(input_size_bytes < map_copy_threshold) {
92f5a8d4 265 return copy_to_device_map(first, last, result, queue, events);
7c673cae
FG
266 }
267 // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
268 // on host and then perform copy_to_device()
269 else if(input_size_bytes < direct_copy_threshold) {
270 std::vector<output_type> vector(first, last);
92f5a8d4
TL
271 return copy_to_device(
272 vector.begin(), vector.end(), result, queue, events
273 );
7c673cae
FG
274 }
275
276 // [direct_copy_threshold; inf) -> map [first; last) to device and
277 // run copy kernel on device for copying & casting
278 // At this point we are sure that count > 1 (first != last).
279
280 // Perform async copy to device, wait for it to be finished and
281 // return the result.
282 // At this point we are sure that count > 1 (first != last), so event
283 // returned by dispatch_copy_async() must be valid.
92f5a8d4 284 return dispatch_copy_async(first, last, result, queue, events).get();
7c673cae
FG
285}
286
287// host -> device
288// InputIterator is NOT a contiguous iterator
289template<class InputIterator, class OutputIterator>
290inline OutputIterator
291dispatch_copy(InputIterator first,
292 InputIterator last,
293 OutputIterator result,
294 command_queue &queue,
92f5a8d4 295 const wait_list &events,
7c673cae
FG
296 typename boost::enable_if<
297 mpl::and_<
298 mpl::not_<
299 is_device_iterator<InputIterator>
300 >,
301 is_device_iterator<OutputIterator>,
302 mpl::not_<
303 is_contiguous_iterator<InputIterator>
304 >
305 >
306 >::type* = 0)
307{
308 typedef typename OutputIterator::value_type output_type;
309 typedef typename std::iterator_traits<InputIterator>::value_type input_type;
310
311 const device &device = queue.get_device();
312
313 // loading parameters
314 std::string cache_key =
315 std::string("__boost_compute_copy_to_device_")
316 + type_name<input_type>() + "_" + type_name<output_type>();
317 boost::shared_ptr<parameter_cache> parameters =
318 detail::parameter_cache::get_global_cache(device);
319
b32b8144
FG
320 uint_ map_copy_threshold;
321 uint_ direct_copy_threshold;
7c673cae
FG
322
323 // calculate default values of thresholds
324 if (device.type() & device::gpu) {
325 // GPUs
326 map_copy_threshold = 524288; // 0.5 MB
327 direct_copy_threshold = 52428800; // 50 MB
328 }
329 else {
330 // CPUs and other devices
331 map_copy_threshold = 134217728; // 128 MB
332 direct_copy_threshold = 0; // it's never efficient for CPUs
333 }
334
335 // load thresholds
336 map_copy_threshold =
337 parameters->get(
338 cache_key, "map_copy_threshold", map_copy_threshold
339 );
340 direct_copy_threshold =
341 parameters->get(
342 cache_key, "direct_copy_threshold", direct_copy_threshold
343 );
344
345 // select copy method based on thresholds & input_size_bytes
346 size_t input_size = iterator_range_size(first, last);
347 size_t input_size_bytes = input_size * sizeof(input_type);
348
349 // [0; map_copy_threshold) -> copy_to_device_map()
350 //
351 // if direct_copy_threshold is less than map_copy_threshold
352 // copy_to_device_map() is used for every input
353 if(input_size_bytes < map_copy_threshold
354 || direct_copy_threshold <= map_copy_threshold) {
92f5a8d4 355 return copy_to_device_map(first, last, result, queue, events);
7c673cae
FG
356 }
357 // [map_copy_threshold; inf) -> convert [first; last)
358 // on host and then perform copy_to_device()
359 std::vector<output_type> vector(first, last);
92f5a8d4 360 return copy_to_device(vector.begin(), vector.end(), result, queue, events);
7c673cae
FG
361}
362
363// device -> host (async)
364template<class InputIterator, class OutputIterator>
365inline future<OutputIterator>
366dispatch_copy_async(InputIterator first,
367 InputIterator last,
368 OutputIterator result,
369 command_queue &queue,
92f5a8d4 370 const wait_list &events,
7c673cae
FG
371 typename boost::enable_if<
372 mpl::and_<
373 is_device_iterator<InputIterator>,
374 mpl::not_<
375 is_device_iterator<OutputIterator>
376 >,
377 is_same_value_type<OutputIterator, InputIterator>
378 >
379 >::type* = 0)
380{
381 BOOST_STATIC_ASSERT_MSG(
382 is_contiguous_iterator<OutputIterator>::value,
383 "copy_async() is only supported for contiguous host iterators"
384 );
385
92f5a8d4 386 return copy_to_host_async(first, last, result, queue, events);
7c673cae
FG
387}
388
389// device -> host (async)
390// Type mismatch between InputIterator and OutputIterator value_types
391template<class InputIterator, class OutputIterator>
392inline future<OutputIterator>
393dispatch_copy_async(InputIterator first,
394 InputIterator last,
395 OutputIterator result,
396 command_queue &queue,
92f5a8d4 397 const wait_list &events,
7c673cae
FG
398 typename boost::enable_if<
399 mpl::and_<
400 is_device_iterator<InputIterator>,
401 mpl::not_<
402 is_device_iterator<OutputIterator>
403 >,
404 mpl::not_<
405 is_same_value_type<OutputIterator, InputIterator>
406 >
407 >
408 >::type* = 0)
409{
410 BOOST_STATIC_ASSERT_MSG(
411 is_contiguous_iterator<OutputIterator>::value,
412 "copy_async() is only supported for contiguous host iterators"
413 );
414
415 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
416 const context &context = queue.get_context();
417 size_t count = iterator_range_size(first, last);
418
419 if(count < size_t(1)) {
420 return future<OutputIterator>();
421 }
422
423 // map host memory to device
424 buffer mapped_host(
425 context,
426 count * sizeof(output_type),
427 buffer::write_only | buffer::use_host_ptr,
428 static_cast<void*>(
429 ::boost::addressof(*result)
430 )
431 );
432 // copy async on device
433 ::boost::compute::future<buffer_iterator<output_type> > future =
434 copy_on_device_async(
435 first,
436 last,
437 make_buffer_iterator<output_type>(mapped_host),
92f5a8d4
TL
438 queue,
439 events
7c673cae
FG
440 );
441 // update host memory asynchronously by maping and unmaping memory
442 event map_event;
443 void* ptr = queue.enqueue_map_buffer_async(
444 mapped_host,
445 CL_MAP_READ,
446 0,
447 count * sizeof(output_type),
448 map_event,
449 future.get_event()
450 );
451 event unmap_event =
452 queue.enqueue_unmap_buffer(mapped_host, ptr, map_event);
453 return make_future(result + count, unmap_event);
454}
455
456// device -> host
457// OutputIterator is a contiguous iterator
458template<class InputIterator, class OutputIterator>
459inline OutputIterator
460dispatch_copy(InputIterator first,
461 InputIterator last,
462 OutputIterator result,
463 command_queue &queue,
92f5a8d4 464 const wait_list &events,
7c673cae
FG
465 typename boost::enable_if<
466 mpl::and_<
467 is_device_iterator<InputIterator>,
468 mpl::not_<
469 is_device_iterator<OutputIterator>
470 >,
471 is_same_value_type<OutputIterator, InputIterator>,
472 is_contiguous_iterator<OutputIterator>,
473 mpl::not_<
474 is_bool_value_type<OutputIterator>
475 >
476 >
477 >::type* = 0)
478{
92f5a8d4 479 return copy_to_host(first, last, result, queue, events);
7c673cae
FG
480}
481
482// device -> host
483// Type mismatch between InputIterator and OutputIterator value_types
484// OutputIterator is NOT a contiguous iterator or value_type of OutputIterator
485// is a boolean type.
486template<class InputIterator, class OutputIterator>
487inline OutputIterator
488dispatch_copy(InputIterator first,
489 InputIterator last,
490 OutputIterator result,
491 command_queue &queue,
92f5a8d4 492 const wait_list &events,
7c673cae
FG
493 typename boost::enable_if<
494 mpl::and_<
495 is_device_iterator<InputIterator>,
496 mpl::not_<
497 is_device_iterator<OutputIterator>
498 >,
499 mpl::or_<
500 mpl::not_<
501 is_contiguous_iterator<OutputIterator>
502 >,
503 is_bool_value_type<OutputIterator>
504 >
505 >
506 >::type* = 0)
507{
508 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
509 typedef typename InputIterator::value_type input_type;
510
511 const device &device = queue.get_device();
512
513 // loading parameters
514 std::string cache_key =
515 std::string("__boost_compute_copy_to_host_")
516 + type_name<input_type>() + "_" + type_name<output_type>();
517 boost::shared_ptr<parameter_cache> parameters =
518 detail::parameter_cache::get_global_cache(device);
519
b32b8144
FG
520 uint_ map_copy_threshold;
521 uint_ direct_copy_threshold;
7c673cae
FG
522
523 // calculate default values of thresholds
524 if (device.type() & device::gpu) {
525 // GPUs
526 map_copy_threshold = 33554432; // 30 MB
527 direct_copy_threshold = 0; // it's never efficient for GPUs
528 }
529 else {
530 // CPUs and other devices
531 map_copy_threshold = 134217728; // 128 MB
532 direct_copy_threshold = 0; // it's never efficient for CPUs
533 }
534
535 // load thresholds
536 map_copy_threshold =
537 parameters->get(
538 cache_key, "map_copy_threshold", map_copy_threshold
539 );
540 direct_copy_threshold =
541 parameters->get(
542 cache_key, "direct_copy_threshold", direct_copy_threshold
543 );
544
545 // select copy method based on thresholds & input_size_bytes
546 size_t count = iterator_range_size(first, last);
547 size_t input_size_bytes = count * sizeof(input_type);
548
549 // [0; map_copy_threshold) -> copy_to_host_map()
550 //
551 // if direct_copy_threshold is less than map_copy_threshold
552 // copy_to_host_map() is used for every input
553 if(input_size_bytes < map_copy_threshold
554 || direct_copy_threshold <= map_copy_threshold) {
92f5a8d4 555 return copy_to_host_map(first, last, result, queue, events);
7c673cae
FG
556 }
557 // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
558 // then copy (and convert) to result using std::copy()
559 std::vector<input_type> vector(count);
92f5a8d4 560 copy_to_host(first, last, vector.begin(), queue, events);
7c673cae
FG
561 return std::copy(vector.begin(), vector.end(), result);
562}
563
564// device -> host
565// Type mismatch between InputIterator and OutputIterator value_types
566// OutputIterator is a contiguous iterator
567// value_type of OutputIterator is NOT a boolean type
568template<class InputIterator, class OutputIterator>
569inline OutputIterator
570dispatch_copy(InputIterator first,
571 InputIterator last,
572 OutputIterator result,
573 command_queue &queue,
92f5a8d4 574 const wait_list &events,
7c673cae
FG
575 typename boost::enable_if<
576 mpl::and_<
577 is_device_iterator<InputIterator>,
578 mpl::not_<
579 is_device_iterator<OutputIterator>
580 >,
581 mpl::not_<
582 is_same_value_type<OutputIterator, InputIterator>
583 >,
584 is_contiguous_iterator<OutputIterator>,
585 mpl::not_<
586 is_bool_value_type<OutputIterator>
587 >
588 >
589 >::type* = 0)
590{
591 typedef typename std::iterator_traits<OutputIterator>::value_type output_type;
592 typedef typename InputIterator::value_type input_type;
593
594 const device &device = queue.get_device();
595
596 // loading parameters
597 std::string cache_key =
598 std::string("__boost_compute_copy_to_host_")
599 + type_name<input_type>() + "_" + type_name<output_type>();
600 boost::shared_ptr<parameter_cache> parameters =
601 detail::parameter_cache::get_global_cache(device);
602
b32b8144
FG
603 uint_ map_copy_threshold;
604 uint_ direct_copy_threshold;
7c673cae
FG
605
606 // calculate default values of thresholds
607 if (device.type() & device::gpu) {
608 // GPUs
609 map_copy_threshold = 524288; // 0.5 MB
610 direct_copy_threshold = 52428800; // 50 MB
611 }
612 else {
613 // CPUs and other devices
614 map_copy_threshold = 134217728; // 128 MB
615 direct_copy_threshold = 0; // it's never efficient for CPUs
616 }
617
618 // load thresholds
619 map_copy_threshold =
620 parameters->get(
621 cache_key, "map_copy_threshold", map_copy_threshold
622 );
623 direct_copy_threshold =
624 parameters->get(
625 cache_key, "direct_copy_threshold", direct_copy_threshold
626 );
627
628 // select copy method based on thresholds & input_size_bytes
629 size_t count = iterator_range_size(first, last);
630 size_t input_size_bytes = count * sizeof(input_type);
631
632 // [0; map_copy_threshold) -> copy_to_host_map()
633 if(input_size_bytes < map_copy_threshold) {
92f5a8d4 634 return copy_to_host_map(first, last, result, queue, events);
7c673cae
FG
635 }
636 // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
637 // temporary vector then copy (and convert) to result using std::copy()
638 else if(input_size_bytes < direct_copy_threshold) {
639 std::vector<input_type> vector(count);
92f5a8d4 640 copy_to_host(first, last, vector.begin(), queue, events);
7c673cae
FG
641 return std::copy(vector.begin(), vector.end(), result);
642 }
643
644 // [direct_copy_threshold; inf) -> map [result; result + input_size) to
645 // device and run copy kernel on device for copying & casting
646 // map host memory to device.
647
648 // Perform async copy to host, wait for it to be finished and
649 // return the result.
650 // At this point we are sure that count > 1 (first != last), so event
651 // returned by dispatch_copy_async() must be valid.
92f5a8d4 652 return dispatch_copy_async(first, last, result, queue, events).get();
7c673cae
FG
653}
654
655// device -> device
656template<class InputIterator, class OutputIterator>
657inline OutputIterator
658dispatch_copy(InputIterator first,
659 InputIterator last,
660 OutputIterator result,
661 command_queue &queue,
92f5a8d4 662 const wait_list &events,
7c673cae
FG
663 typename boost::enable_if<
664 mpl::and_<
665 is_device_iterator<InputIterator>,
666 is_device_iterator<OutputIterator>,
667 mpl::not_<
668 can_copy_with_copy_buffer<
669 InputIterator, OutputIterator
670 >
671 >
672 >
673 >::type* = 0)
674{
92f5a8d4 675 return copy_on_device(first, last, result, queue, events);
7c673cae
FG
676}
677
678// device -> device (specialization for buffer iterators)
679template<class InputIterator, class OutputIterator>
680inline OutputIterator
681dispatch_copy(InputIterator first,
682 InputIterator last,
683 OutputIterator result,
684 command_queue &queue,
92f5a8d4 685 const wait_list &events,
7c673cae
FG
686 typename boost::enable_if<
687 mpl::and_<
688 is_device_iterator<InputIterator>,
689 is_device_iterator<OutputIterator>,
690 can_copy_with_copy_buffer<
691 InputIterator, OutputIterator
692 >
693 >
694 >::type* = 0)
695{
696 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
697 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
698
699 difference_type n = std::distance(first, last);
700 if(n < 1){
701 // nothing to copy
702 return result;
703 }
704
705 queue.enqueue_copy_buffer(first.get_buffer(),
706 result.get_buffer(),
707 first.get_index() * sizeof(value_type),
708 result.get_index() * sizeof(value_type),
92f5a8d4
TL
709 static_cast<size_t>(n) * sizeof(value_type),
710 events);
7c673cae
FG
711 return result + n;
712}
713
714// device -> device (async)
715template<class InputIterator, class OutputIterator>
716inline future<OutputIterator>
717dispatch_copy_async(InputIterator first,
718 InputIterator last,
719 OutputIterator result,
720 command_queue &queue,
92f5a8d4 721 const wait_list &events,
7c673cae
FG
722 typename boost::enable_if<
723 mpl::and_<
724 is_device_iterator<InputIterator>,
725 is_device_iterator<OutputIterator>,
726 mpl::not_<
727 can_copy_with_copy_buffer<
728 InputIterator, OutputIterator
729 >
730 >
731 >
732 >::type* = 0)
733{
92f5a8d4 734 return copy_on_device_async(first, last, result, queue, events);
7c673cae
FG
735}
736
737// device -> device (async, specialization for buffer iterators)
738template<class InputIterator, class OutputIterator>
739inline future<OutputIterator>
740dispatch_copy_async(InputIterator first,
741 InputIterator last,
742 OutputIterator result,
743 command_queue &queue,
92f5a8d4 744 const wait_list &events,
7c673cae
FG
745 typename boost::enable_if<
746 mpl::and_<
747 is_device_iterator<InputIterator>,
748 is_device_iterator<OutputIterator>,
749 can_copy_with_copy_buffer<
750 InputIterator, OutputIterator
751 >
752 >
753 >::type* = 0)
754{
755 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
756 typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
757
758 difference_type n = std::distance(first, last);
759 if(n < 1){
760 // nothing to copy
761 return make_future(result, event());
762 }
763
764 event event_ =
765 queue.enqueue_copy_buffer(
766 first.get_buffer(),
767 result.get_buffer(),
768 first.get_index() * sizeof(value_type),
769 result.get_index() * sizeof(value_type),
92f5a8d4
TL
770 static_cast<size_t>(n) * sizeof(value_type),
771 events
7c673cae
FG
772 );
773
774 return make_future(result + n, event_);
775}
776
777// host -> host
778template<class InputIterator, class OutputIterator>
779inline OutputIterator
780dispatch_copy(InputIterator first,
781 InputIterator last,
782 OutputIterator result,
783 command_queue &queue,
92f5a8d4 784 const wait_list &events,
7c673cae
FG
785 typename boost::enable_if_c<
786 !is_device_iterator<InputIterator>::value &&
787 !is_device_iterator<OutputIterator>::value
788 >::type* = 0)
789{
790 (void) queue;
92f5a8d4 791 (void) events;
7c673cae
FG
792
793 return std::copy(first, last, result);
794}
795
796} // end detail namespace
797
798/// Copies the values in the range [\p first, \p last) to the range
799/// beginning at \p result.
800///
801/// The generic copy() function can be used for a variety of data
802/// transfer tasks and provides a standard interface to the following
803/// OpenCL functions:
804///
805/// \li \c clEnqueueReadBuffer()
806/// \li \c clEnqueueWriteBuffer()
807/// \li \c clEnqueueCopyBuffer()
808///
809/// Unlike the aforementioned OpenCL functions, copy() will also work
810/// with non-contiguous data-structures (e.g. \c std::list<T>) as
811/// well as with "fancy" iterators (e.g. transform_iterator).
812///
813/// \param first first element in the range to copy
814/// \param last last element in the range to copy
815/// \param result first element in the result range
816/// \param queue command queue to perform the operation
817///
818/// \return \c OutputIterator to the end of the result range
819///
820/// For example, to copy an array of \c int values on the host to a vector on
821/// the device:
822/// \code
823/// // array on the host
824/// int data[] = { 1, 2, 3, 4 };
825///
826/// // vector on the device
827/// boost::compute::vector<int> vec(4, context);
828///
829/// // copy values to the device vector
830/// boost::compute::copy(data, data + 4, vec.begin(), queue);
831/// \endcode
832///
833/// The copy algorithm can also be used with standard containers such as
834/// \c std::vector<T>:
835/// \code
836/// std::vector<int> host_vector = ...
837/// boost::compute::vector<int> device_vector = ...
838///
839/// // copy from the host to the device
840/// boost::compute::copy(
841/// host_vector.begin(), host_vector.end(), device_vector.begin(), queue
842/// );
843///
844/// // copy from the device to the host
845/// boost::compute::copy(
846/// device_vector.begin(), device_vector.end(), host_vector.begin(), queue
847/// );
848/// \endcode
849///
b32b8144
FG
850/// Space complexity: \Omega(1)
851///
7c673cae
FG
852/// \see copy_n(), copy_if(), copy_async()
853template<class InputIterator, class OutputIterator>
854inline OutputIterator copy(InputIterator first,
855 InputIterator last,
856 OutputIterator result,
92f5a8d4
TL
857 command_queue &queue = system::default_queue(),
858 const wait_list &events = wait_list())
7c673cae 859{
92f5a8d4 860 return detail::dispatch_copy(first, last, result, queue, events);
7c673cae
FG
861}
862
863/// Copies the values in the range [\p first, \p last) to the range
864/// beginning at \p result. The copy is performed asynchronously.
865///
866/// \see copy()
867template<class InputIterator, class OutputIterator>
868inline future<OutputIterator>
869copy_async(InputIterator first,
870 InputIterator last,
871 OutputIterator result,
92f5a8d4
TL
872 command_queue &queue = system::default_queue(),
873 const wait_list &events = wait_list())
7c673cae 874{
92f5a8d4 875 return detail::dispatch_copy_async(first, last, result, queue, events);
7c673cae
FG
876}
877
878} // end compute namespace
879} // end boost namespace
880
881#endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP