]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/compute/random/threefry_engine.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / compute / random / threefry_engine.hpp
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP
12 #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP
13
14 #include <algorithm>
15
16 #include <boost/compute/types.hpp>
17 #include <boost/compute/buffer.hpp>
18 #include <boost/compute/kernel.hpp>
19 #include <boost/compute/context.hpp>
20 #include <boost/compute/program.hpp>
21 #include <boost/compute/command_queue.hpp>
22 #include <boost/compute/algorithm/transform.hpp>
23 #include <boost/compute/detail/iterator_range_size.hpp>
24 #include <boost/compute/utility/program_cache.hpp>
25 #include <boost/compute/container/vector.hpp>
26 #include <boost/compute/iterator/discard_iterator.hpp>
27
28 namespace boost {
29 namespace compute {
30
31 /// \class threefry_engine
32 /// \brief Threefry pseudorandom number generator.
33 template<class T = uint_>
34 class threefry_engine
35 {
36 public:
37 typedef T result_type;
38 static const ulong_ default_seed = 0UL;
39
40 /// Creates a new threefry_engine and seeds it with \p value.
41 explicit threefry_engine(command_queue &queue,
42 ulong_ value = default_seed)
43 : m_key(value),
44 m_counter(0),
45 m_context(queue.get_context())
46 {
47 // Load program
48 load_program();
49 }
50
51 /// Creates a new threefry_engine object as a copy of \p other.
52 threefry_engine(const threefry_engine<T> &other)
53 : m_key(other.m_key),
54 m_counter(other.m_counter),
55 m_context(other.m_context),
56 m_program(other.m_program)
57 {
58 }
59
60 /// Copies \p other to \c *this.
61 threefry_engine<T>& operator=(const threefry_engine<T> &other)
62 {
63 if(this != &other){
64 m_key = other.m_key;
65 m_counter = other.m_counter;
66 m_context = other.m_context;
67 m_program = other.m_program;
68 }
69
70 return *this;
71 }
72
73 /// Destroys the threefry_engine object.
74 ~threefry_engine()
75 {
76 }
77
78 /// Seeds the random number generator with \p value.
79 ///
80 /// \param value seed value for the random-number generator
81 /// \param queue command queue to perform the operation
82 ///
83 /// If no seed value is provided, \c default_seed is used.
84 void seed(ulong_ value, command_queue &queue)
85 {
86 (void) queue;
87 m_key = value;
88 // Reset counter
89 m_counter = 0;
90 }
91
92 /// \overload
93 void seed(command_queue &queue)
94 {
95 seed(default_seed, queue);
96 }
97
98 /// Generates random numbers and stores them to the range [\p first, \p last).
99 template<class OutputIterator>
100 void generate(OutputIterator first, OutputIterator last, command_queue &queue)
101 {
102 const size_t size = detail::iterator_range_size(first, last);
103
104 kernel fill_kernel(m_program, "fill");
105 fill_kernel.set_arg(0, first.get_buffer());
106 fill_kernel.set_arg(1, static_cast<const uint_>(size));
107 fill_kernel.set_arg(2, m_key);
108 fill_kernel.set_arg(3, m_counter);
109
110 queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0);
111
112 discard(size, queue);
113 }
114
115 /// \internal_
116 void generate(discard_iterator first, discard_iterator last, command_queue &queue)
117 {
118 (void) queue;
119 ulong_ offset = std::distance(first, last);
120 m_counter += offset;
121 }
122
123 /// Generates random numbers, transforms them with \p op, and then stores
124 /// them to the range [\p first, \p last).
125 template<class OutputIterator, class Function>
126 void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue)
127 {
128 vector<T> tmp(std::distance(first, last), queue.get_context());
129 generate(tmp.begin(), tmp.end(), queue);
130 ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue);
131 }
132
133 /// Generates \p z random numbers and discards them.
134 void discard(size_t z, command_queue &queue)
135 {
136 generate(discard_iterator(0), discard_iterator(z), queue);
137 }
138
139 private:
140 void load_program()
141 {
142 boost::shared_ptr<program_cache> cache =
143 program_cache::get_global_cache(m_context);
144 std::string cache_key =
145 std::string("__boost_threefry_engine_32x2");
146
147 // Copyright 2010-2012, D. E. Shaw Research.
148 // All rights reserved.
149
150 // Redistribution and use in source and binary forms, with or without
151 // modification, are permitted provided that the following conditions are
152 // met:
153
154 // * Redistributions of source code must retain the above copyright
155 // notice, this list of conditions, and the following disclaimer.
156
157 // * Redistributions in binary form must reproduce the above copyright
158 // notice, this list of conditions, and the following disclaimer in the
159 // documentation and/or other materials provided with the distribution.
160
161 // * Neither the name of D. E. Shaw Research nor the names of its
162 // contributors may be used to endorse or promote products derived from
163 // this software without specific prior written permission.
164
165 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
166 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
167 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
168 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
169 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
170 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
171 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
172 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
173 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
174 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
175 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
176 const char source[] =
177 "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n"
178 "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n"
179
180 "enum r123_enum_threefry32x2 {\n"
181 " R_32x2_0_0=13,\n"
182 " R_32x2_1_0=15,\n"
183 " R_32x2_2_0=26,\n"
184 " R_32x2_3_0= 6,\n"
185 " R_32x2_4_0=17,\n"
186 " R_32x2_5_0=29,\n"
187 " R_32x2_6_0=16,\n"
188 " R_32x2_7_0=24\n"
189 "};\n"
190
191 "static uint RotL_32(uint x, uint N)\n"
192 "{\n"
193 " return (x << (N & 31)) | (x >> ((32-N) & 31));\n"
194 "}\n"
195
196 "struct r123array2x32 {\n"
197 " uint v[2];\n"
198 "};\n"
199 "typedef struct r123array2x32 threefry2x32_ctr_t;\n"
200 "typedef struct r123array2x32 threefry2x32_key_t;\n"
201
202 "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n"
203 "{\n"
204 " threefry2x32_ctr_t X;\n"
205 " uint ks[3];\n"
206 " uint i; \n"
207 " ks[2] = SKEIN_KS_PARITY_32;\n"
208 " for (i=0;i < 2; i++) {\n"
209 " ks[i] = k.v[i];\n"
210 " X.v[i] = in.v[i];\n"
211 " ks[2] ^= k.v[i];\n"
212 " }\n"
213 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
214 " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
215 " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
216 " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
217 " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
218 " if(Nrounds>3){\n"
219 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
220 " X.v[1] += 1;\n"
221 " }\n"
222 " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
223 " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
224 " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
225 " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
226 " if(Nrounds>7){\n"
227 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
228 " X.v[1] += 2;\n"
229 " }\n"
230 " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
231 " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
232 " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
233 " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
234 " if(Nrounds>11){\n"
235 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
236 " X.v[1] += 3;\n"
237 " }\n"
238 " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
239 " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
240 " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
241 " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
242 " if(Nrounds>15){\n"
243 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
244 " X.v[1] += 4;\n"
245 " }\n"
246 " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
247 " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
248 " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
249 " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
250 " if(Nrounds>19){\n"
251 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
252 " X.v[1] += 5;\n"
253 " }\n"
254 " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
255 " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
256 " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
257 " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
258 " if(Nrounds>23){\n"
259 " X.v[0] += ks[0]; X.v[1] += ks[1];\n"
260 " X.v[1] += 6;\n"
261 " }\n"
262 " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n"
263 " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n"
264 " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n"
265 " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n"
266 " if(Nrounds>27){\n"
267 " X.v[0] += ks[1]; X.v[1] += ks[2];\n"
268 " X.v[1] += 7;\n"
269 " }\n"
270 " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n"
271 " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n"
272 " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n"
273 " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n"
274 " if(Nrounds>31){\n"
275 " X.v[0] += ks[2]; X.v[1] += ks[0];\n"
276 " X.v[1] += 8;\n"
277 " }\n"
278 " return X;\n"
279 "}\n"
280 "__kernel void fill(__global uint * output,\n"
281 " const uint output_size,\n"
282 " const uint2 key,\n"
283 " const uint2 counter)\n"
284 "{\n"
285 " uint gid = get_global_id(0);\n"
286 " threefry2x32_ctr_t c;\n"
287 " c.v[0] = counter.x + gid;\n"
288 " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n"
289 "\n"
290 " threefry2x32_key_t k = { {key.x, key.y} };\n"
291 "\n"
292 " threefry2x32_ctr_t result;\n"
293 " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n"
294 "\n"
295 " if(gid < output_size/2)\n"
296 " {\n"
297 " output[2 * gid] = result.v[0];\n"
298 " output[2 * gid + 1] = result.v[1];\n"
299 " }\n"
300 " else if(gid < (output_size+1)/2)\n"
301 " output[2 * gid] = result.v[0];\n"
302 "}\n";
303
304 m_program = cache->get_or_build(cache_key, std::string(), source, m_context);
305 }
306
307 // Engine state
308 ulong_ m_key; // 2 x 32bit
309 ulong_ m_counter;
310 // OpenCL
311 context m_context;
312 program m_program;
313 };
314
315 } // end compute namespace
316 } // end boost namespace
317
318 #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP