]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/compute/include/boost/compute/random/threefry_engine.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / compute / include / boost / compute / random / threefry_engine.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
12 #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
13
14 #include <algorithm>
15
16 #include <boost/compute/types.hpp>
17 #include <boost/compute/buffer.hpp>
18 #include <boost/compute/kernel.hpp>
19 #include <boost/compute/context.hpp>
20 #include <boost/compute/program.hpp>
21 #include <boost/compute/command_queue.hpp>
22 #include <boost/compute/algorithm/transform.hpp>
23 #include <boost/compute/detail/iterator_range_size.hpp>
24 #include <boost/compute/utility/program_cache.hpp>
25 #include <boost/compute/container/vector.hpp>
26 #include <boost/compute/iterator/discard_iterator.hpp>
27
28 namespace boost {
29 namespace compute {
30
31 /// \class threefry_engine
32 /// \brief Threefry pseudorandom number generator.
33 template<class T = uint_>
34 class threefry_engine
35 {
36 public:
37 static const size_t threads = 1024;
38 typedef T result_type;
39
40 /// Creates a new threefry_engine and seeds it with \p value.
41 explicit threefry_engine(command_queue &queue)
42 : m_context(queue.get_context())
43 {
44 // setup program
45 load_program();
46 }
47
48 /// Creates a new threefry_engine object as a copy of \p other.
49 threefry_engine(const threefry_engine<T> &other)
50 : m_context(other.m_context),
51 m_program(other.m_program)
52 {
53 }
54
55 /// Copies \p other to \c *this.
56 threefry_engine<T>& operator=(const threefry_engine<T> &other)
57 {
58 if(this != &other){
59 m_context = other.m_context;
60 m_program = other.m_program;
61 }
62
63 return *this;
64 }
65
66 /// Destroys the threefry_engine object.
67 ~threefry_engine()
68 {
69 }
70
71 private:
72 /// \internal_
73 void load_program()
74 {
75 boost::shared_ptr<program_cache> cache =
76 program_cache::get_global_cache(m_context);
77 std::string cache_key =
78 std::string("threefry_engine_32x2");
79
80 // Copyright 2010-2012, D. E. Shaw Research.
81 // All rights reserved.
82
83 // Redistribution and use in source and binary forms, with or without
84 // modification, are permitted provided that the following conditions are
85 // met:
86
87 // * Redistributions of source code must retain the above copyright
88 // notice, this list of conditions, and the following disclaimer.
89
90 // * Redistributions in binary form must reproduce the above copyright
91 // notice, this list of conditions, and the following disclaimer in the
92 // documentation and/or other materials provided with the distribution.
93
94 // * Neither the name of D. E. Shaw Research nor the names of its
95 // contributors may be used to endorse or promote products derived from
96 // this software without specific prior written permission.
97
98 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
99 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
100 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
101 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
102 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
103 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
104 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
105 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
106 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
107 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
108 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
109 const char source[] =
110 "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
111 "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
112
113 "enum r123_enum_threefry32x2 {\n"
114 " R_32x2_0_0=13,\n"
115 " R_32x2_1_0=15,\n"
116 " R_32x2_2_0=26,\n"
117 " R_32x2_3_0= 6,\n"
118 " R_32x2_4_0=17,\n"
119 " R_32x2_5_0=29,\n"
120 " R_32x2_6_0=16,\n"
121 " R_32x2_7_0=24\n"
122 "};\n"
123
124 "static uint RotL_32(uint x, uint N)\n"
125 "{\n"
126 " return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
127 "}\n"
128
129 "struct r123array2x32 {\n"
130 " uint v[2];\n"
131 "};\n"
132 "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
133 "typedef struct r123array2x32 threefry2x32_key_t;\n"
134
135 "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
136 "{\n"
137 " threefry2x32_ctr_t X;\n"
138 " uint ks[3];\n"
139 " uint i; \n"
140 " ks[2] = SKEIN_KS_PARITY_32;\n"
141 " for (i=0;i < 2; i++) {\n"
142 " ks[i] = k.v[i];\n"
143 " X.v[i] = in.v[i];\n"
144 " ks[2] ^= k.v[i];\n"
145 " }\n"
146 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
147 " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
148 " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
149 " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
150 " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
151 " if(Nrounds>3){\n"
152 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
153 " X.v[1] += 1;\n"
154 " }\n"
155 " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
156 " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
157 " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
158 " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
159 " if(Nrounds>7){\n"
160 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
161 " X.v[1] += 2;\n"
162 " }\n"
163 " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
164 " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
165 " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
166 " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
167 " if(Nrounds>11){\n"
168 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
169 " X.v[1] += 3;\n"
170 " }\n"
171 " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
172 " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
173 " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
174 " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
175 " if(Nrounds>15){\n"
176 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
177 " X.v[1] += 4;\n"
178 " }\n"
179 " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
180 " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
181 " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
182 " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
183 " if(Nrounds>19){\n"
184 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
185 " X.v[1] += 5;\n"
186 " }\n"
187 " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
188 " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
189 " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
190 " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
191 " if(Nrounds>23){\n"
192 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
193 " X.v[1] += 6;\n"
194 " }\n"
195 " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
196 " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
197 " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
198 " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
199 " if(Nrounds>27){\n"
200 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
201 " X.v[1] += 7;\n"
202 " }\n"
203 " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
204 " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
205 " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
206 " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
207 " if(Nrounds>31){\n"
208 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
209 " X.v[1] += 8;\n"
210 " }\n"
211 " return X;\n"
212 "}\n"
213
214 "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n"
215 " threefry2x32_ctr_t in;\n"
216 " threefry2x32_key_t k;\n"
217 " const uint i = get_global_id(0);\n"
218 " in.v[0] = ctr[2 * (offset + i)];\n"
219 " in.v[1] = ctr[2 * (offset + i) + 1];\n"
220 " k.v[0] = key[2 * (offset + i)];\n"
221 " k.v[1] = key[2 * (offset + i) + 1];\n"
222 " in = threefry2x32_R(20, in, k);\n"
223 " ctr[2 * (offset + i)] = in.v[0];\n"
224 " ctr[2 * (offset + i) + 1] = in.v[1];\n"
225 "}\n";
226
227 m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
228 }
229
230 public:
231
232
233 /// Generates Threefry random numbers using both the counter and key values, and then stores
234 /// them to the range [\p first_ctr, \p last_ctr).
235 template<class OutputIterator>
236 void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) {
237 const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
238 const size_t size_key = detail::iterator_range_size(first_key, last_key);
239 if(!size_ctr || !size_key || (size_ctr != size_key)) {
240 return;
241 }
242 kernel rng_kernel = m_program.create_kernel("generate_rng");
243
244 rng_kernel.set_arg(0, first_ctr.get_buffer());
245 rng_kernel.set_arg(1, first_key.get_buffer());
246 size_t offset = 0;
247
248 for(;;){
249 size_t count = 0;
250 size_t size = size_ctr/2;
251 if(size > threads){
252 count = (std::min)(static_cast<size_t>(threads), size - offset);
253 }
254 else {
255 count = size;
256 }
257 rng_kernel.set_arg(2, static_cast<const uint_>(offset));
258 queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);
259
260 offset += count;
261
262 if(offset >= size){
263 break;
264 }
265
266 }
267 }
268
269 template<class OutputIterator>
270 void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) {
271 const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr);
272 if(!size_ctr) {
273 return;
274 }
275 boost::compute::vector<uint_> vector_key(size_ctr, m_context);
276 vector_key.assign(size_ctr, 0, queue);
277 kernel rng_kernel = m_program.create_kernel("generate_rng");
278
279 rng_kernel.set_arg(0, first_ctr.get_buffer());
280 rng_kernel.set_arg(1, vector_key);
281 size_t offset = 0;
282
283 for(;;){
284 size_t count = 0;
285 size_t size = size_ctr/2;
286 if(size > threads){
287 count = (std::min)(static_cast<size_t>(threads), size - offset);
288 }
289 else {
290 count = size;
291 }
292 rng_kernel.set_arg(2, static_cast<const uint_>(offset));
293 queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0);
294
295 offset += count;
296
297 if(offset >= size){
298 break;
299 }
300
301 }
302 }
303 private:
304 context m_context;
305 program m_program;
306 };
307
308 } // end compute namespace
309 } // end boost namespace
310
311 #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP