]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/mpi/include/boost/mpi/collectives.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / mpi / include / boost / mpi / collectives.hpp
1 // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
2
3 // Use, modification and distribution is subject to the Boost Software
4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
5 // http://www.boost.org/LICENSE_1_0.txt)
6
7 // Message Passing Interface 1.1 -- Section 4. MPI Collectives
8
9 /** @file collectives.hpp
10 *
11 * This header contains MPI collective operations, which implement
12 * various parallel algorithms that require the coordination of all
13 * processes within a communicator. The header @c collectives_fwd.hpp
14 * provides forward declarations for each of these operations. To
15 * include only specific collective algorithms, use the headers @c
16 * boost/mpi/collectives/algorithm_name.hpp.
17 */
18 #ifndef BOOST_MPI_COLLECTIVES_HPP
19 #define BOOST_MPI_COLLECTIVES_HPP
20
21 #include <boost/mpi/communicator.hpp>
22 #include <boost/mpi/inplace.hpp>
23 #include <vector>
24
25 namespace boost { namespace mpi {
26 /**
27 * @brief Gather the values stored at every process into vectors of
28 * values from each process.
29 *
30 * @c all_gather is a collective algorithm that collects the values
31 * stored at each process into a vector of values indexed by the
32 * process number they came from. The type @c T of the values may be
33 * any type that is serializable or has an associated MPI data type.
34 *
35 * When the type @c T has an associated MPI data type, this routine
36 * invokes @c MPI_Allgather to gather the values.
37 *
38 * @param comm The communicator over which the all-gather will
39 * occur.
40 *
41 * @param in_value The value to be transmitted by each process. To
42 * gather an array of values, @c in_values points to the @c n local
43 * values to be transmitted.
44 *
45 * @param out_values A vector or pointer to storage that will be
46 * populated with the values from each process, indexed by the
47 * process ID number. If it is a vector, the vector will be resized
48 * accordingly.
49 */
50 template<typename T>
51 void
52 all_gather(const communicator& comm, const T& in_value,
53 std::vector<T>& out_values);
54
55 /**
56 * \overload
57 */
58 template<typename T>
59 void
60 all_gather(const communicator& comm, const T& in_value, T* out_values);
61
62 /**
63 * \overload
64 */
65 template<typename T>
66 void
67 all_gather(const communicator& comm, const T* in_values, int n,
68 std::vector<T>& out_values);
69
70 /**
71 * \overload
72 */
73 template<typename T>
74 void
75 all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
76
77 /**
78 * @brief Combine the values stored by each process into a single
79 * value available to all processes.
80 *
81 * @c all_reduce is a collective algorithm that combines the values
82 * stored by each process into a single value available to all
83 * processes. The values are combined in a user-defined way,
84 * specified via a function object. The type @c T of the values may
85 * be any type that is serializable or has an associated MPI data
86 * type. One can think of this operation as a @c all_gather, followed
87 * by an @c std::accumulate() over the gather values and using the
88 * operation @c op.
89 *
90 * When the type @c T has an associated MPI data type, this routine
91 * invokes @c MPI_Allreduce to perform the reduction. If possible,
92 * built-in MPI operations will be used; otherwise, @c all_reduce()
93 * will create a custom MPI_Op for the call to MPI_Allreduce.
94 *
95 * @param comm The communicator over which the reduction will
96 * occur.
97 * @param value The local value to be combined with the local
98 * values of every other process. For reducing arrays, @c in_values
99 * is a pointer to the local values to be reduced and @c n is the
100 * number of values to reduce. See @c reduce for more information.
101 *
102 * If wrapped in a @c inplace_t object, combine the usage of both
103 * input and $c out_value and the local value will be overwritten
104 * (a convenience function @c inplace is provided for the wrapping).
105 *
106 * @param out_value Will receive the result of the reduction
107 * operation. If this parameter is omitted, the outgoing value will
108 * instead be returned.
109 *
110 * @param op The binary operation that combines two values of type
111 * @c T and returns a third value of type @c T. For types @c T that has
112 * ssociated MPI data types, @c op will either be translated into
113 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
114 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
115 * operations.hpp header for more details on this mapping. For any
116 * non-built-in operation, commutativity will be determined by the
117 * @c is_commmutative trait (also in @c operations.hpp): users are
118 * encouraged to mark commutative operations as such, because it
119 * gives the implementation additional lattitude to optimize the
120 * reduction operation.
121 *
122 * @param n Indicated the size of the buffers of array type.
123 * @returns If no @p out_value parameter is supplied, returns the
124 * result of the reduction operation.
125 */
126 template<typename T, typename Op>
127 void
128 all_reduce(const communicator& comm, const T* value, int n, T* out_value,
129 Op op);
130 /**
131 * \overload
132 */
133 template<typename T, typename Op>
134 void
135 all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
136 /**
137 * \overload
138 */
139 template<typename T, typename Op>
140 T all_reduce(const communicator& comm, const T& value, Op op);
141
142 /**
143 * \overload
144 */
145 template<typename T, typename Op>
146 void
147 all_reduce(const communicator& comm, inplace_t<T*> value, int n,
148 Op op);
149 /**
150 * \overload
151 */
152 template<typename T, typename Op>
153 void
154 all_reduce(const communicator& comm, inplace_t<T> value, Op op);
155
156 /**
157 * @brief Send data from every process to every other process.
158 *
159 * @c all_to_all is a collective algorithm that transmits @c p values
160 * from every process to every other process. On process i, jth value
161 * of the @p in_values vector is sent to process j and placed in the
162 * ith position of the @p out_values vector in process @p j. The type
163 * @c T of the values may be any type that is serializable or has an
164 * associated MPI data type. If @c n is provided, then arrays of @p n
165 * values will be transferred from one process to another.
166 *
167 * When the type @c T has an associated MPI data type, this routine
168 * invokes @c MPI_Alltoall to scatter the values.
169 *
170 * @param comm The communicator over which the all-to-all
171 * communication will occur.
172 *
173 * @param in_values A vector or pointer to storage that contains
174 * the values to send to each process, indexed by the process ID
175 * number.
176 *
177 * @param out_values A vector or pointer to storage that will be
178 * updated to contain the values received from other processes. The
179 * jth value in @p out_values will come from the procss with rank j.
180 */
181 template<typename T>
182 void
183 all_to_all(const communicator& comm, const std::vector<T>& in_values,
184 std::vector<T>& out_values);
185
186 /**
187 * \overload
188 */
189 template<typename T>
190 void all_to_all(const communicator& comm, const T* in_values, T* out_values);
191
192 /**
193 * \overload
194 */
195 template<typename T>
196 void
197 all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
198 std::vector<T>& out_values);
199
200 /**
201 * \overload
202 */
203 template<typename T>
204 void
205 all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
206
207 /**
208 * @brief Broadcast a value from a root process to all other
209 * processes.
210 *
211 * @c broadcast is a collective algorithm that transfers a value from
212 * an arbitrary @p root process to every other process that is part of
213 * the given communicator. The @c broadcast algorithm can transmit any
214 * Serializable value, values that have associated MPI data types,
215 * packed archives, skeletons, and the content of skeletons; see the
216 * @c send primitive for communicators for a complete list. The type
217 * @c T shall be the same for all processes that are a part of the
218 * communicator @p comm, unless packed archives are being transferred:
219 * with packed archives, the root sends a @c packed_oarchive or @c
220 * packed_skeleton_oarchive whereas the other processes receive a
221 * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
222 *
223 * When the type @c T has an associated MPI data type, this routine
224 * invokes @c MPI_Bcast to perform the broadcast.
225 *
226 * @param comm The communicator over which the broadcast will
227 * occur.
228 *
229 * @param value The value (or values, if @p n is provided) to be
230 * transmitted (if the rank of @p comm is equal to @p root) or
231 * received (if the rank of @p comm is not equal to @p root). When
232 * the @p value is a @c skeleton_proxy, only the skeleton of the
233 * object will be broadcast. In this case, the @p root will build a
234 * skeleton from the object help in the proxy and all of the
235 * non-roots will reshape the objects held in their proxies based on
236 * the skeleton sent from the root.
237 *
238 * @param n When supplied, the number of values that the pointer @p
239 * values points to, for broadcasting an array of values. The value
240 * of @p n must be the same for all processes in @p comm.
241 *
242 * @param root The rank/process ID of the process that will be
243 * transmitting the value.
244 */
245 template<typename T>
246 void broadcast(const communicator& comm, T& value, int root);
247
248 /**
249 * \overload
250 */
251 template<typename T>
252 void broadcast(const communicator& comm, T* values, int n, int root);
253
254 /**
255 * \overload
256 */
257 template<typename T>
258 void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
259
260 /**
261 * \overload
262 */
263 template<typename T>
264 void
265 broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
266
267 /**
268 * @brief Gather the values stored at every process into a vector at
269 * the root process.
270 *
271 * @c gather is a collective algorithm that collects the values
272 * stored at each process into a vector of values at the @p root
273 * process. This vector is indexed by the process number that the
274 * value came from. The type @c T of the values may be any type that
275 * is serializable or has an associated MPI data type.
276 *
277 * When the type @c T has an associated MPI data type, this routine
278 * invokes @c MPI_Gather to gather the values.
279 *
280 * @param comm The communicator over which the gather will occur.
281 *
282 * @param in_value The value to be transmitted by each process. For
283 * gathering arrays of values, @c in_values points to storage for
284 * @c n*comm.size() values.
285 *
286 * @param out_values A vector or pointer to storage that will be
287 * populated with the values from each process, indexed by the
288 * process ID number. If it is a vector, it will be resized
289 * accordingly. For non-root processes, this parameter may be
290 * omitted. If it is still provided, however, it will be unchanged.
291 *
292 * @param root The process ID number that will collect the
293 * values. This value must be the same on all processes.
294 */
295 template<typename T>
296 void
297 gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
298 int root);
299
300 /**
301 * \overload
302 */
303 template<typename T>
304 void
305 gather(const communicator& comm, const T& in_value, T* out_values, int root);
306
307 /**
308 * \overload
309 */
310 template<typename T>
311 void gather(const communicator& comm, const T& in_value, int root);
312
313 /**
314 * \overload
315 */
316 template<typename T>
317 void
318 gather(const communicator& comm, const T* in_values, int n,
319 std::vector<T>& out_values, int root);
320
321 /**
322 * \overload
323 */
324 template<typename T>
325 void
326 gather(const communicator& comm, const T* in_values, int n, T* out_values,
327 int root);
328
329 /**
330 * \overload
331 */
332 template<typename T>
333 void gather(const communicator& comm, const T* in_values, int n, int root);
334
335 /**
336 * @brief Similar to boost::mpi::gather with the difference that the number
337 * of values to be send by non-root processes can vary.
338 *
339 * @param comm The communicator over which the gather will occur.
340 *
341 * @param in_values The array of values to be transmitted by each process.
342 *
343 * @param in_size For each non-root process this specifies the size
344 * of @p in_values.
345 *
346 * @param out_values A pointer to storage that will be populated with
347 * the values from each process. For non-root processes, this parameter
348 * may be omitted. If it is still provided, however, it will be unchanged.
349 *
350 * @param sizes A vector containing the number of elements each non-root
351 * process will send.
352 *
353 * @param displs A vector such that the i-th entry specifies the
354 * displacement (relative to @p out_values) from which to take the ingoing
355 * data at the @p root process. Overloaded versions for which @p displs is
356 * omitted assume that the data is to be placed contiguously at the root process.
357 *
358 * @param root The process ID number that will collect the
359 * values. This value must be the same on all processes.
360 */
361 template<typename T>
362 void
363 gatherv(const communicator& comm, const std::vector<T>& in_values,
364 T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
365 int root);
366
367 /**
368 * \overload
369 */
370 template<typename T>
371 void
372 gatherv(const communicator& comm, const T* in_values, int in_size,
373 T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
374 int root);
375
376 /**
377 * \overload
378 */
379 template<typename T>
380 void gatherv(const communicator& comm, const std::vector<T>& in_values, int root);
381
382 /**
383 * \overload
384 */
385 template<typename T>
386 void gatherv(const communicator& comm, const T* in_values, int in_size, int root);
387
388 /**
389 * \overload
390 */
391 template<typename T>
392 void
393 gatherv(const communicator& comm, const T* in_values, int in_size,
394 T* out_values, const std::vector<int>& sizes, int root);
395
396 /**
397 * \overload
398 */
399 template<typename T>
400 void
401 gatherv(const communicator& comm, const std::vector<T>& in_values,
402 T* out_values, const std::vector<int>& sizes, int root);
403
404 /**
405 * @brief Scatter the values stored at the root to all processes
406 * within the communicator.
407 *
408 * @c scatter is a collective algorithm that scatters the values
409 * stored in the @p root process (inside a vector) to all of the
410 * processes in the communicator. The vector @p out_values (only
411 * significant at the @p root) is indexed by the process number to
412 * which the corresponding value will be sent. The type @c T of the
413 * values may be any type that is serializable or has an associated
414 * MPI data type.
415 *
416 * When the type @c T has an associated MPI data type, this routine
417 * invokes @c MPI_Scatter to scatter the values.
418 *
419 * @param comm The communicator over which the scatter will occur.
420 *
421 * @param in_values A vector or pointer to storage that will contain
422 * the values to send to each process, indexed by the process rank.
423 * For non-root processes, this parameter may be omitted. If it is
424 * still provided, however, it will be unchanged.
425 *
426 * @param out_value The value received by each process. When
427 * scattering an array of values, @p out_values points to the @p n
428 * values that will be received by each process.
429 *
430 * @param root The process ID number that will scatter the
431 * values. This value must be the same on all processes.
432 */
433 template<typename T>
434 void
435 scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
436 int root);
437
438 /**
439 * \overload
440 */
441 template<typename T>
442 void
443 scatter(const communicator& comm, const T* in_values, T& out_value, int root);
444
445 /**
446 * \overload
447 */
448 template<typename T>
449 void scatter(const communicator& comm, T& out_value, int root);
450
451 /**
452 * \overload
453 */
454 template<typename T>
455 void
456 scatter(const communicator& comm, const std::vector<T>& in_values,
457 T* out_values, int n, int root);
458
459 /**
460 * \overload
461 */
462 template<typename T>
463 void
464 scatter(const communicator& comm, const T* in_values, T* out_values, int n,
465 int root);
466
467 /**
468 * \overload
469 */
470 template<typename T>
471 void scatter(const communicator& comm, T* out_values, int n, int root);
472
473 /**
474 * @brief Similar to boost::mpi::scatter with the difference that the number
475 * of values stored at the root process does not need to be a multiple of
476 * the communicator's size.
477 *
478 * @param comm The communicator over which the scatter will occur.
479 *
480 * @param in_values A vector or pointer to storage that will contain
481 * the values to send to each process, indexed by the process rank.
482 * For non-root processes, this parameter may be omitted. If it is
483 * still provided, however, it will be unchanged.
484 *
485 * @param sizes A vector containing the number of elements each non-root
486 * process will receive.
487 *
488 * @param displs A vector such that the i-th entry specifies the
489 * displacement (relative to @p in_values) from which to take the outgoing
490 * data to process i. Overloaded versions for which @p displs is omitted
491 * assume that the data is contiguous at the @p root process.
492 *
493 * @param out_values The array of values received by each process.
494 *
495 * @param out_size For each non-root process this will contain the size
496 * of @p out_values.
497 *
498 * @param root The process ID number that will scatter the
499 * values. This value must be the same on all processes.
500 */
501 template<typename T>
502 void
503 scatterv(const communicator& comm, const std::vector<T>& in_values,
504 const std::vector<int>& sizes, const std::vector<int>& displs,
505 T* out_values, int out_size, int root);
506
507 /**
508 * \overload
509 */
510 template<typename T>
511 void
512 scatterv(const communicator& comm, const T* in_values,
513 const std::vector<int>& sizes, const std::vector<int>& displs,
514 T* out_values, int out_size, int root);
515
516 /**
517 * \overload
518 */
519 template<typename T>
520 void scatterv(const communicator& comm, T* out_values, int out_size, int root);
521
522 /**
523 * \overload
524 */
525 template<typename T>
526 void
527 scatterv(const communicator& comm, const T* in_values,
528 const std::vector<int>& sizes, T* out_values, int root);
529
530 /**
531 * \overload
532 */
533 template<typename T>
534 void
535 scatterv(const communicator& comm, const std::vector<T>& in_values,
536 const std::vector<int>& sizes, T* out_values, int root);
537
538 /**
539 * @brief Combine the values stored by each process into a single
540 * value at the root.
541 *
542 * @c reduce is a collective algorithm that combines the values
543 * stored by each process into a single value at the @c root. The
544 * values can be combined arbitrarily, specified via a function
545 * object. The type @c T of the values may be any type that is
546 * serializable or has an associated MPI data type. One can think of
547 * this operation as a @c gather to the @p root, followed by an @c
548 * std::accumulate() over the gathered values and using the operation
549 * @c op.
550 *
551 * When the type @c T has an associated MPI data type, this routine
552 * invokes @c MPI_Reduce to perform the reduction. If possible,
553 * built-in MPI operations will be used; otherwise, @c reduce() will
554 * create a custom MPI_Op for the call to MPI_Reduce.
555 *
556 * @param comm The communicator over which the reduction will
557 * occur.
558 *
559 * @param in_value The local value to be combined with the local
560 * values of every other process. For reducing arrays, @c in_values
561 * contains a pointer to the local values. In this case, @c n is
562 * the number of values that will be reduced. Reduction occurs
563 * independently for each of the @p n values referenced by @p
564 * in_values, e.g., calling reduce on an array of @p n values is
565 * like calling @c reduce @p n separate times, one for each
566 * location in @p in_values and @p out_values.
567 *
568 * @param out_value Will receive the result of the reduction
569 * operation, but only for the @p root process. Non-root processes
570 * may omit if parameter; if they choose to supply the parameter,
571 * it will be unchanged. For reducing arrays, @c out_values
572 * contains a pointer to the storage for the output values.
573 *
574 * @param op The binary operation that combines two values of type
575 * @c T into a third value of type @c T. For types @c T that has
576 * ssociated MPI data types, @c op will either be translated into
577 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
578 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
579 * operations.hpp header for more details on this mapping. For any
580 * non-built-in operation, commutativity will be determined by the
581 * @c is_commmutative trait (also in @c operations.hpp): users are
582 * encouraged to mark commutative operations as such, because it
583 * gives the implementation additional lattitude to optimize the
584 * reduction operation.
585 *
586 * @param root The process ID number that will receive the final,
587 * combined value. This value must be the same on all processes.
588 */
589 template<typename T, typename Op>
590 void
591 reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
592 int root);
593
594 /**
595 * \overload
596 */
597 template<typename T, typename Op>
598 void reduce(const communicator& comm, const T& in_value, Op op, int root);
599
600 /**
601 * \overload
602 */
603 template<typename T, typename Op>
604 void
605 reduce(const communicator& comm, const T* in_values, int n, T* out_values,
606 Op op, int root);
607
608 /**
609 * \overload
610 */
611 template<typename T, typename Op>
612 void
613 reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
614
615 /**
616 * @brief Compute a prefix reduction of values from all processes in
617 * the communicator.
618 *
619 * @c scan is a collective algorithm that combines the values stored
620 * by each process with the values of all processes with a smaller
621 * rank. The values can be arbitrarily combined, specified via a
622 * function object @p op. The type @c T of the values may be any type
623 * that is serializable or has an associated MPI data type. One can
624 * think of this operation as a @c gather to some process, followed
625 * by an @c std::prefix_sum() over the gathered values using the
626 * operation @c op. The ith process returns the ith value emitted by
627 * @c std::prefix_sum().
628 *
629 * When the type @c T has an associated MPI data type, this routine
630 * invokes @c MPI_Scan to perform the reduction. If possible,
631 * built-in MPI operations will be used; otherwise, @c scan() will
632 * create a custom @c MPI_Op for the call to MPI_Scan.
633 *
634 * @param comm The communicator over which the prefix reduction
635 * will occur.
636 *
637 * @param in_value The local value to be combined with the local
638 * values of other processes. For the array variant, the @c
639 * in_values parameter points to the @c n local values that will be
640 * combined.
641 *
642 * @param out_value If provided, the ith process will receive the
643 * value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
644 * ... )). For the array variant, @c out_values contains a pointer
645 * to storage for the @c n output values. The prefix reduction
646 * occurs independently for each of the @p n values referenced by
647 * @p in_values, e.g., calling scan on an array of @p n values is
648 * like calling @c scan @p n separate times, one for each location
649 * in @p in_values and @p out_values.
650 *
651 * @param op The binary operation that combines two values of type
652 * @c T into a third value of type @c T. For types @c T that has
653 * ssociated MPI data types, @c op will either be translated into
654 * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
655 * directly to a built-in MPI operation. See @c is_mpi_op in the @c
656 * operations.hpp header for more details on this mapping. For any
657 * non-built-in operation, commutativity will be determined by the
658 * @c is_commmutative trait (also in @c operations.hpp).
659 *
660 * @returns If no @p out_value parameter is provided, returns the
661 * result of prefix reduction.
662 */
663 template<typename T, typename Op>
664 void
665 scan(const communicator& comm, const T& in_value, T& out_value, Op op);
666
667 /**
668 * \overload
669 */
670 template<typename T, typename Op>
671 T
672 scan(const communicator& comm, const T& in_value, Op op);
673
674 /**
675 * \overload
676 */
677 template<typename T, typename Op>
678 void
679 scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
680
681 } } // end namespace boost::mpi
682 #endif // BOOST_MPI_COLLECTIVES_HPP
683
684 #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
685 // Include implementations of each of the collectives
686 # include <boost/mpi/collectives/all_gather.hpp>
687 # include <boost/mpi/collectives/all_reduce.hpp>
688 # include <boost/mpi/collectives/all_to_all.hpp>
689 # include <boost/mpi/collectives/broadcast.hpp>
690 # include <boost/mpi/collectives/gather.hpp>
691 # include <boost/mpi/collectives/gatherv.hpp>
692 # include <boost/mpi/collectives/scatter.hpp>
693 # include <boost/mpi/collectives/scatterv.hpp>
694 # include <boost/mpi/collectives/reduce.hpp>
695 # include <boost/mpi/collectives/scan.hpp>
696 #endif
697