]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | //---------------------------------------------------------------------------// |
2 | // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com> | |
3 | // | |
4 | // Distributed under the Boost Software License, Version 1.0 | |
5 | // See accompanying file LICENSE_1_0.txt or copy at | |
6 | // http://www.boost.org/LICENSE_1_0.txt | |
7 | // | |
8 | // See http://boostorg.github.com/compute for more information. | |
9 | //---------------------------------------------------------------------------// | |
10 | ||
11 | #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP | |
12 | #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP | |
13 | ||
14 | #include <algorithm> | |
15 | ||
16 | #include <boost/compute/types.hpp> | |
17 | #include <boost/compute/buffer.hpp> | |
18 | #include <boost/compute/kernel.hpp> | |
19 | #include <boost/compute/context.hpp> | |
20 | #include <boost/compute/program.hpp> | |
21 | #include <boost/compute/command_queue.hpp> | |
22 | #include <boost/compute/algorithm/transform.hpp> | |
23 | #include <boost/compute/detail/iterator_range_size.hpp> | |
24 | #include <boost/compute/utility/program_cache.hpp> | |
25 | #include <boost/compute/container/vector.hpp> | |
26 | #include <boost/compute/iterator/discard_iterator.hpp> | |
27 | ||
28 | namespace boost { | |
29 | namespace compute { | |
30 | ||
31 | /// \class threefry_engine | |
32 | /// \brief Threefry pseudorandom number generator. | |
33 | template<class T = uint_> | |
34 | class threefry_engine | |
35 | { | |
36 | public: | |
7c673cae | 37 | typedef T result_type; |
92f5a8d4 | 38 | static const ulong_ default_seed = 0UL; |
7c673cae FG |
39 | |
40 | /// Creates a new threefry_engine and seeds it with \p value. | |
92f5a8d4 TL |
41 | explicit threefry_engine(command_queue &queue, |
42 | ulong_ value = default_seed) | |
43 | : m_key(value), | |
44 | m_counter(0), | |
45 | m_context(queue.get_context()) | |
7c673cae | 46 | { |
92f5a8d4 | 47 | // Load program |
7c673cae FG |
48 | load_program(); |
49 | } | |
50 | ||
51 | /// Creates a new threefry_engine object as a copy of \p other. | |
52 | threefry_engine(const threefry_engine<T> &other) | |
92f5a8d4 TL |
53 | : m_key(other.m_key), |
54 | m_counter(other.m_counter), | |
55 | m_context(other.m_context), | |
7c673cae FG |
56 | m_program(other.m_program) |
57 | { | |
58 | } | |
59 | ||
60 | /// Copies \p other to \c *this. | |
61 | threefry_engine<T>& operator=(const threefry_engine<T> &other) | |
62 | { | |
63 | if(this != &other){ | |
92f5a8d4 TL |
64 | m_key = other.m_key; |
65 | m_counter = other.m_counter; | |
7c673cae FG |
66 | m_context = other.m_context; |
67 | m_program = other.m_program; | |
68 | } | |
69 | ||
70 | return *this; | |
71 | } | |
72 | ||
73 | /// Destroys the threefry_engine object. | |
74 | ~threefry_engine() | |
75 | { | |
76 | } | |
77 | ||
92f5a8d4 TL |
78 | /// Seeds the random number generator with \p value. |
79 | /// | |
80 | /// \param value seed value for the random-number generator | |
81 | /// \param queue command queue to perform the operation | |
82 | /// | |
83 | /// If no seed value is provided, \c default_seed is used. | |
84 | void seed(ulong_ value, command_queue &queue) | |
85 | { | |
86 | (void) queue; | |
87 | m_key = value; | |
88 | // Reset counter | |
89 | m_counter = 0; | |
90 | } | |
91 | ||
92 | /// \overload | |
93 | void seed(command_queue &queue) | |
94 | { | |
95 | seed(default_seed, queue); | |
96 | } | |
97 | ||
98 | /// Generates random numbers and stores them to the range [\p first, \p last). | |
99 | template<class OutputIterator> | |
100 | void generate(OutputIterator first, OutputIterator last, command_queue &queue) | |
101 | { | |
102 | const size_t size = detail::iterator_range_size(first, last); | |
103 | ||
104 | kernel fill_kernel(m_program, "fill"); | |
105 | fill_kernel.set_arg(0, first.get_buffer()); | |
106 | fill_kernel.set_arg(1, static_cast<const uint_>(size)); | |
107 | fill_kernel.set_arg(2, m_key); | |
108 | fill_kernel.set_arg(3, m_counter); | |
109 | ||
110 | queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0); | |
111 | ||
112 | discard(size, queue); | |
113 | } | |
114 | ||
7c673cae | 115 | /// \internal_ |
92f5a8d4 TL |
116 | void generate(discard_iterator first, discard_iterator last, command_queue &queue) |
117 | { | |
118 | (void) queue; | |
119 | ulong_ offset = std::distance(first, last); | |
120 | m_counter += offset; | |
121 | } | |
122 | ||
123 | /// Generates random numbers, transforms them with \p op, and then stores | |
124 | /// them to the range [\p first, \p last). | |
125 | template<class OutputIterator, class Function> | |
126 | void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) | |
127 | { | |
128 | vector<T> tmp(std::distance(first, last), queue.get_context()); | |
129 | generate(tmp.begin(), tmp.end(), queue); | |
130 | ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue); | |
131 | } | |
132 | ||
133 | /// Generates \p z random numbers and discards them. | |
134 | void discard(size_t z, command_queue &queue) | |
135 | { | |
136 | generate(discard_iterator(0), discard_iterator(z), queue); | |
137 | } | |
138 | ||
139 | private: | |
7c673cae FG |
140 | void load_program() |
141 | { | |
142 | boost::shared_ptr<program_cache> cache = | |
143 | program_cache::get_global_cache(m_context); | |
144 | std::string cache_key = | |
92f5a8d4 | 145 | std::string("__boost_threefry_engine_32x2"); |
7c673cae FG |
146 | |
147 | // Copyright 2010-2012, D. E. Shaw Research. | |
148 | // All rights reserved. | |
149 | ||
150 | // Redistribution and use in source and binary forms, with or without | |
151 | // modification, are permitted provided that the following conditions are | |
152 | // met: | |
153 | ||
154 | // * Redistributions of source code must retain the above copyright | |
155 | // notice, this list of conditions, and the following disclaimer. | |
156 | ||
157 | // * Redistributions in binary form must reproduce the above copyright | |
158 | // notice, this list of conditions, and the following disclaimer in the | |
159 | // documentation and/or other materials provided with the distribution. | |
160 | ||
161 | // * Neither the name of D. E. Shaw Research nor the names of its | |
162 | // contributors may be used to endorse or promote products derived from | |
163 | // this software without specific prior written permission. | |
164 | ||
165 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
166 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
167 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
168 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
169 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
170 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
171 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
172 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
173 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
174 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
175 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
176 | const char source[] = | |
177 | "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" | |
178 | "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" | |
179 | ||
180 | "enum r123_enum_threefry32x2 {\n" | |
181 | " R_32x2_0_0=13,\n" | |
182 | " R_32x2_1_0=15,\n" | |
183 | " R_32x2_2_0=26,\n" | |
184 | " R_32x2_3_0= 6,\n" | |
185 | " R_32x2_4_0=17,\n" | |
186 | " R_32x2_5_0=29,\n" | |
187 | " R_32x2_6_0=16,\n" | |
188 | " R_32x2_7_0=24\n" | |
189 | "};\n" | |
190 | ||
191 | "static uint RotL_32(uint x, uint N)\n" | |
192 | "{\n" | |
193 | " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" | |
194 | "}\n" | |
92f5a8d4 | 195 | |
7c673cae FG |
196 | "struct r123array2x32 {\n" |
197 | " uint v[2];\n" | |
198 | "};\n" | |
199 | "typedef struct r123array2x32 threefry2x32_ctr_t;\n" | |
200 | "typedef struct r123array2x32 threefry2x32_key_t;\n" | |
201 | ||
202 | "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" | |
203 | "{\n" | |
204 | " threefry2x32_ctr_t X;\n" | |
205 | " uint ks[3];\n" | |
206 | " uint i; \n" | |
207 | " ks[2] = SKEIN_KS_PARITY_32;\n" | |
208 | " for (i=0;i < 2; i++) {\n" | |
209 | " ks[i] = k.v[i];\n" | |
210 | " X.v[i] = in.v[i];\n" | |
211 | " ks[2] ^= k.v[i];\n" | |
212 | " }\n" | |
213 | " X.v[0] += ks[0]; X.v[1] += ks[1];\n" | |
214 | " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" | |
215 | " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" | |
216 | " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" | |
217 | " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" | |
218 | " if(Nrounds>3){\n" | |
219 | " X.v[0] += ks[1]; X.v[1] += ks[2];\n" | |
220 | " X.v[1] += 1;\n" | |
221 | " }\n" | |
222 | " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" | |
223 | " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" | |
224 | " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" | |
225 | " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" | |
226 | " if(Nrounds>7){\n" | |
227 | " X.v[0] += ks[2]; X.v[1] += ks[0];\n" | |
228 | " X.v[1] += 2;\n" | |
229 | " }\n" | |
230 | " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" | |
231 | " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" | |
232 | " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" | |
233 | " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" | |
234 | " if(Nrounds>11){\n" | |
235 | " X.v[0] += ks[0]; X.v[1] += ks[1];\n" | |
236 | " X.v[1] += 3;\n" | |
237 | " }\n" | |
238 | " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" | |
239 | " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" | |
240 | " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" | |
241 | " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" | |
242 | " if(Nrounds>15){\n" | |
243 | " X.v[0] += ks[1]; X.v[1] += ks[2];\n" | |
244 | " X.v[1] += 4;\n" | |
245 | " }\n" | |
246 | " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" | |
247 | " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" | |
248 | " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" | |
249 | " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" | |
250 | " if(Nrounds>19){\n" | |
251 | " X.v[0] += ks[2]; X.v[1] += ks[0];\n" | |
252 | " X.v[1] += 5;\n" | |
253 | " }\n" | |
254 | " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" | |
255 | " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" | |
256 | " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" | |
257 | " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" | |
258 | " if(Nrounds>23){\n" | |
259 | " X.v[0] += ks[0]; X.v[1] += ks[1];\n" | |
260 | " X.v[1] += 6;\n" | |
261 | " }\n" | |
262 | " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" | |
263 | " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" | |
264 | " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" | |
265 | " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" | |
266 | " if(Nrounds>27){\n" | |
267 | " X.v[0] += ks[1]; X.v[1] += ks[2];\n" | |
268 | " X.v[1] += 7;\n" | |
269 | " }\n" | |
270 | " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" | |
271 | " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" | |
272 | " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" | |
273 | " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" | |
274 | " if(Nrounds>31){\n" | |
275 | " X.v[0] += ks[2]; X.v[1] += ks[0];\n" | |
276 | " X.v[1] += 8;\n" | |
277 | " }\n" | |
278 | " return X;\n" | |
279 | "}\n" | |
92f5a8d4 TL |
280 | "__kernel void fill(__global uint * output,\n" |
281 | " const uint output_size,\n" | |
282 | " const uint2 key,\n" | |
283 | " const uint2 counter)\n" | |
284 | "{\n" | |
285 | " uint gid = get_global_id(0);\n" | |
286 | " threefry2x32_ctr_t c;\n" | |
287 | " c.v[0] = counter.x + gid;\n" | |
288 | " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n" | |
289 | "\n" | |
290 | " threefry2x32_key_t k = { {key.x, key.y} };\n" | |
291 | "\n" | |
292 | " threefry2x32_ctr_t result;\n" | |
293 | " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n" | |
294 | "\n" | |
295 | " if(gid < output_size/2)\n" | |
296 | " {\n" | |
297 | " output[2 * gid] = result.v[0];\n" | |
298 | " output[2 * gid + 1] = result.v[1];\n" | |
299 | " }\n" | |
300 | " else if(gid < (output_size+1)/2)\n" | |
301 | " output[2 * gid] = result.v[0];\n" | |
7c673cae FG |
302 | "}\n"; |
303 | ||
304 | m_program = cache->get_or_build(cache_key, std::string(), source, m_context); | |
305 | } | |
306 | ||
92f5a8d4 TL |
307 | // Engine state |
308 | ulong_ m_key; // 2 x 32bit | |
309 | ulong_ m_counter; | |
310 | // OpenCL | |
7c673cae FG |
311 | context m_context; |
312 | program m_program; | |
313 | }; | |
314 | ||
315 | } // end compute namespace | |
316 | } // end boost namespace | |
317 | ||
318 | #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP |