]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /********************************************************************** |
2 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | ||
30 | ||
31 | #ifndef _ERASURE_CODE_H_ | |
32 | #define _ERASURE_CODE_H_ | |
33 | ||
34 | /** | |
35 | * @file erasure_code.h | |
36 | * @brief Interface to functions supporting erasure code encode and decode. | |
37 | * | |
38 | * This file defines the interface to optimized functions used in erasure | |
39 | * codes. Encode and decode of erasures in GF(2^8) are made by calculating the | |
40 | * dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a | |
41 | * set of coefficients. Values for the coefficients are determined by the type | |
42 | * of erasure code. Using a general dot product means that any sequence of | |
43 | * coefficients may be used including erasure codes based on random | |
44 | * coefficients. | |
45 | * Multiple versions of dot product are supplied to calculate 1-6 output | |
46 | * vectors in one pass. | |
47 | * Base GF multiply and divide functions can be sped up by defining | |
48 | * GF_LARGE_TABLES at the expense of memory size. | |
49 | * | |
50 | */ | |
51 | ||
52 | #include "gf_vect_mul.h" | |
53 | ||
54 | #ifdef __cplusplus | |
55 | extern "C" { | |
56 | #endif | |
57 | ||
58 | /** | |
59 | * @brief Initialize tables for fast Erasure Code encode and decode. | |
60 | * | |
61 | * Generates the expanded tables needed for fast encode or decode for erasure | |
62 | * codes on blocks of data. 32bytes is generated for each input coefficient. | |
63 | * | |
64 | * @param k The number of vector sources or rows in the generator matrix | |
65 | * for coding. | |
66 | * @param rows The number of output vectors to concurrently encode/decode. | |
67 | * @param a Pointer to sets of arrays of input coefficients used to encode | |
68 | * or decode data. | |
69 | * @param gftbls Pointer to start of space for concatenated output tables | |
70 | * generated from input coefficients. Must be of size 32*k*rows. | |
71 | * @returns none | |
72 | */ | |
73 | ||
74 | void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); | |
75 | ||
76 | /** | |
77 | * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. | |
78 | * | |
79 | * Given a list of source data blocks, generate one or multiple blocks of | |
80 | * encoded data as specified by a matrix of GF(2^8) coefficients. When given a | |
81 | * suitable set of coefficients, this function will perform the fast generation | |
82 | * or decoding of Reed-Solomon type erasure codes. | |
83 | * | |
84 | * This function determines what instruction sets are enabled and | |
85 | * selects the appropriate version at runtime. | |
86 | * | |
87 | * @param len Length of each block of data (vector) of source or dest data. | |
88 | * @param k The number of vector sources or rows in the generator matrix | |
89 | * for coding. | |
90 | * @param rows The number of output vectors to concurrently encode/decode. | |
91 | * @param gftbls Pointer to array of input tables generated from coding | |
92 | * coefficients in ec_init_tables(). Must be of size 32*k*rows | |
93 | * @param data Array of pointers to source input buffers. | |
94 | * @param coding Array of pointers to coded output buffers. | |
95 | * @returns none | |
96 | */ | |
97 | ||
98 | void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, | |
99 | unsigned char **coding); | |
100 | ||
101 | /** | |
102 | * @brief Generate or decode erasure codes on blocks of data. | |
103 | * | |
104 | * Arch specific version of ec_encode_data() with same parameters. | |
105 | * @requires SSE4.1 | |
106 | */ | |
107 | void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, | |
108 | unsigned char **coding); | |
109 | ||
110 | /** | |
111 | * @brief Generate or decode erasure codes on blocks of data. | |
112 | * | |
113 | * Arch specific version of ec_encode_data() with same parameters. | |
114 | * @requires AVX | |
115 | */ | |
116 | void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, | |
117 | unsigned char **coding); | |
118 | ||
119 | /** | |
120 | * @brief Generate or decode erasure codes on blocks of data. | |
121 | * | |
122 | * Arch specific version of ec_encode_data() with same parameters. | |
123 | * @requires AVX2 | |
124 | */ | |
125 | void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, | |
126 | unsigned char **coding); | |
127 | ||
128 | /** | |
129 | * @brief Generate or decode erasure codes on blocks of data, runs baseline version. | |
130 | * | |
131 | * Baseline version of ec_encode_data() with same parameters. | |
132 | */ | |
133 | void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, | |
134 | unsigned char **dest); | |
135 | ||
136 | /** | |
137 | * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version. | |
138 | * | |
139 | * Given one source data block, update one or multiple blocks of encoded data as | |
140 | * specified by a matrix of GF(2^8) coefficients. When given a suitable set of | |
141 | * coefficients, this function will perform the fast generation or decoding of | |
142 | * Reed-Solomon type erasure codes from one input source at a time. | |
143 | * | |
144 | * This function determines what instruction sets are enabled and selects the | |
145 | * appropriate version at runtime. | |
146 | * | |
147 | * @param len Length of each block of data (vector) of source or dest data. | |
148 | * @param k The number of vector sources or rows in the generator matrix | |
149 | * for coding. | |
150 | * @param rows The number of output vectors to concurrently encode/decode. | |
151 | * @param vec_i The vector index corresponding to the single input source. | |
152 | * @param g_tbls Pointer to array of input tables generated from coding | |
153 | * coefficients in ec_init_tables(). Must be of size 32*k*rows | |
154 | * @param data Pointer to single input source used to update output parity. | |
155 | * @param coding Array of pointers to coded output buffers. | |
156 | * @returns none | |
157 | */ | |
158 | void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
159 | unsigned char *data, unsigned char **coding); | |
160 | ||
161 | /** | |
162 | * @brief Generate update for encode or decode of erasure codes from single source. | |
163 | * | |
164 | * Arch specific version of ec_encode_data_update() with same parameters. | |
165 | * @requires SSE4.1 | |
166 | */ | |
167 | ||
168 | void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
169 | unsigned char *data, unsigned char **coding); | |
170 | ||
171 | /** | |
172 | * @brief Generate update for encode or decode of erasure codes from single source. | |
173 | * | |
174 | * Arch specific version of ec_encode_data_update() with same parameters. | |
175 | * @requires AVX | |
176 | */ | |
177 | ||
178 | void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
179 | unsigned char *data, unsigned char **coding); | |
180 | ||
181 | /** | |
182 | * @brief Generate update for encode or decode of erasure codes from single source. | |
183 | * | |
184 | * Arch specific version of ec_encode_data_update() with same parameters. | |
185 | * @requires AVX2 | |
186 | */ | |
187 | ||
188 | void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
189 | unsigned char *data, unsigned char **coding); | |
190 | ||
191 | /** | |
192 | * @brief Generate update for encode or decode of erasure codes from single source. | |
193 | * | |
194 | * Baseline version of ec_encode_data_update(). | |
195 | */ | |
196 | ||
197 | void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, | |
198 | unsigned char *data, unsigned char **dest); | |
199 | ||
200 | ||
201 | /** | |
202 | * @brief GF(2^8) vector dot product. | |
203 | * | |
204 | * Does a GF(2^8) dot product across each byte of the input array and a constant | |
205 | * set of coefficients to produce each byte of the output. Can be used for | |
206 | * erasure coding encode and decode. Function requires pre-calculation of a | |
207 | * 32*vlen byte constant array based on the input coefficients. | |
208 | * @requires SSE4.1 | |
209 | * | |
210 | * @param len Length of each vector in bytes. Must be >= 16. | |
211 | * @param vlen Number of vector sources. | |
212 | * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based | |
213 | * on the array of input coefficients. | |
214 | * @param src Array of pointers to source inputs. | |
215 | * @param dest Pointer to destination data array. | |
216 | * @returns none | |
217 | */ | |
218 | ||
219 | void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
220 | unsigned char **src, unsigned char *dest); | |
221 | ||
222 | /** | |
223 | * @brief GF(2^8) vector dot product. | |
224 | * | |
225 | * Does a GF(2^8) dot product across each byte of the input array and a constant | |
226 | * set of coefficients to produce each byte of the output. Can be used for | |
227 | * erasure coding encode and decode. Function requires pre-calculation of a | |
228 | * 32*vlen byte constant array based on the input coefficients. | |
229 | * @requires AVX | |
230 | * | |
231 | * @param len Length of each vector in bytes. Must be >= 16. | |
232 | * @param vlen Number of vector sources. | |
233 | * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based | |
234 | * on the array of input coefficients. | |
235 | * @param src Array of pointers to source inputs. | |
236 | * @param dest Pointer to destination data array. | |
237 | * @returns none | |
238 | */ | |
239 | ||
240 | void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
241 | unsigned char **src, unsigned char *dest); | |
242 | ||
243 | /** | |
244 | * @brief GF(2^8) vector dot product. | |
245 | * | |
246 | * Does a GF(2^8) dot product across each byte of the input array and a constant | |
247 | * set of coefficients to produce each byte of the output. Can be used for | |
248 | * erasure coding encode and decode. Function requires pre-calculation of a | |
249 | * 32*vlen byte constant array based on the input coefficients. | |
250 | * @requires AVX2 | |
251 | * | |
252 | * @param len Length of each vector in bytes. Must be >= 32. | |
253 | * @param vlen Number of vector sources. | |
254 | * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based | |
255 | * on the array of input coefficients. | |
256 | * @param src Array of pointers to source inputs. | |
257 | * @param dest Pointer to destination data array. | |
258 | * @returns none | |
259 | */ | |
260 | ||
261 | void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
262 | unsigned char **src, unsigned char *dest); | |
263 | ||
264 | /** | |
265 | * @brief GF(2^8) vector dot product with two outputs. | |
266 | * | |
267 | * Vector dot product optimized to calculate two ouputs at a time. Does two | |
268 | * GF(2^8) dot products across each byte of the input array and two constant | |
269 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
270 | * erasure coding encode and decode. Function requires pre-calculation of a | |
271 | * 2*32*vlen byte constant array based on the two sets of input coefficients. | |
272 | * @requires SSE4.1 | |
273 | * | |
274 | * @param len Length of each vector in bytes. Must be >= 16. | |
275 | * @param vlen Number of vector sources. | |
276 | * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants | |
277 | * based on the array of input coefficients. | |
278 | * @param src Array of pointers to source inputs. | |
279 | * @param dest Array of pointers to destination data buffers. | |
280 | * @returns none | |
281 | */ | |
282 | ||
283 | void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
284 | unsigned char **src, unsigned char **dest); | |
285 | ||
286 | /** | |
287 | * @brief GF(2^8) vector dot product with two outputs. | |
288 | * | |
289 | * Vector dot product optimized to calculate two ouputs at a time. Does two | |
290 | * GF(2^8) dot products across each byte of the input array and two constant | |
291 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
292 | * erasure coding encode and decode. Function requires pre-calculation of a | |
293 | * 2*32*vlen byte constant array based on the two sets of input coefficients. | |
294 | * @requires AVX | |
295 | * | |
296 | * @param len Length of each vector in bytes. Must be >= 16. | |
297 | * @param vlen Number of vector sources. | |
298 | * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants | |
299 | * based on the array of input coefficients. | |
300 | * @param src Array of pointers to source inputs. | |
301 | * @param dest Array of pointers to destination data buffers. | |
302 | * @returns none | |
303 | */ | |
304 | ||
305 | void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
306 | unsigned char **src, unsigned char **dest); | |
307 | ||
308 | /** | |
309 | * @brief GF(2^8) vector dot product with two outputs. | |
310 | * | |
311 | * Vector dot product optimized to calculate two ouputs at a time. Does two | |
312 | * GF(2^8) dot products across each byte of the input array and two constant | |
313 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
314 | * erasure coding encode and decode. Function requires pre-calculation of a | |
315 | * 2*32*vlen byte constant array based on the two sets of input coefficients. | |
316 | * @requires AVX2 | |
317 | * | |
318 | * @param len Length of each vector in bytes. Must be >= 32. | |
319 | * @param vlen Number of vector sources. | |
320 | * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants | |
321 | * based on the array of input coefficients. | |
322 | * @param src Array of pointers to source inputs. | |
323 | * @param dest Array of pointers to destination data buffers. | |
324 | * @returns none | |
325 | */ | |
326 | ||
327 | void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
328 | unsigned char **src, unsigned char **dest); | |
329 | ||
330 | /** | |
331 | * @brief GF(2^8) vector dot product with three outputs. | |
332 | * | |
333 | * Vector dot product optimized to calculate three ouputs at a time. Does three | |
334 | * GF(2^8) dot products across each byte of the input array and three constant | |
335 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
336 | * erasure coding encode and decode. Function requires pre-calculation of a | |
337 | * 3*32*vlen byte constant array based on the three sets of input coefficients. | |
338 | * @requires SSE4.1 | |
339 | * | |
340 | * @param len Length of each vector in bytes. Must be >= 16. | |
341 | * @param vlen Number of vector sources. | |
342 | * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants | |
343 | * based on the array of input coefficients. | |
344 | * @param src Array of pointers to source inputs. | |
345 | * @param dest Array of pointers to destination data buffers. | |
346 | * @returns none | |
347 | */ | |
348 | ||
349 | void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
350 | unsigned char **src, unsigned char **dest); | |
351 | ||
352 | /** | |
353 | * @brief GF(2^8) vector dot product with three outputs. | |
354 | * | |
355 | * Vector dot product optimized to calculate three ouputs at a time. Does three | |
356 | * GF(2^8) dot products across each byte of the input array and three constant | |
357 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
358 | * erasure coding encode and decode. Function requires pre-calculation of a | |
359 | * 3*32*vlen byte constant array based on the three sets of input coefficients. | |
360 | * @requires AVX | |
361 | * | |
362 | * @param len Length of each vector in bytes. Must be >= 16. | |
363 | * @param vlen Number of vector sources. | |
364 | * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants | |
365 | * based on the array of input coefficients. | |
366 | * @param src Array of pointers to source inputs. | |
367 | * @param dest Array of pointers to destination data buffers. | |
368 | * @returns none | |
369 | */ | |
370 | ||
371 | void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
372 | unsigned char **src, unsigned char **dest); | |
373 | ||
374 | /** | |
375 | * @brief GF(2^8) vector dot product with three outputs. | |
376 | * | |
377 | * Vector dot product optimized to calculate three ouputs at a time. Does three | |
378 | * GF(2^8) dot products across each byte of the input array and three constant | |
379 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
380 | * erasure coding encode and decode. Function requires pre-calculation of a | |
381 | * 3*32*vlen byte constant array based on the three sets of input coefficients. | |
382 | * @requires AVX2 | |
383 | * | |
384 | * @param len Length of each vector in bytes. Must be >= 32. | |
385 | * @param vlen Number of vector sources. | |
386 | * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants | |
387 | * based on the array of input coefficients. | |
388 | * @param src Array of pointers to source inputs. | |
389 | * @param dest Array of pointers to destination data buffers. | |
390 | * @returns none | |
391 | */ | |
392 | ||
393 | void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
394 | unsigned char **src, unsigned char **dest); | |
395 | ||
396 | /** | |
397 | * @brief GF(2^8) vector dot product with four outputs. | |
398 | * | |
399 | * Vector dot product optimized to calculate four ouputs at a time. Does four | |
400 | * GF(2^8) dot products across each byte of the input array and four constant | |
401 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
402 | * erasure coding encode and decode. Function requires pre-calculation of a | |
403 | * 4*32*vlen byte constant array based on the four sets of input coefficients. | |
404 | * @requires SSE4.1 | |
405 | * | |
406 | * @param len Length of each vector in bytes. Must be >= 16. | |
407 | * @param vlen Number of vector sources. | |
408 | * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants | |
409 | * based on the array of input coefficients. | |
410 | * @param src Array of pointers to source inputs. | |
411 | * @param dest Array of pointers to destination data buffers. | |
412 | * @returns none | |
413 | */ | |
414 | ||
415 | void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
416 | unsigned char **src, unsigned char **dest); | |
417 | ||
418 | /** | |
419 | * @brief GF(2^8) vector dot product with four outputs. | |
420 | * | |
421 | * Vector dot product optimized to calculate four ouputs at a time. Does four | |
422 | * GF(2^8) dot products across each byte of the input array and four constant | |
423 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
424 | * erasure coding encode and decode. Function requires pre-calculation of a | |
425 | * 4*32*vlen byte constant array based on the four sets of input coefficients. | |
426 | * @requires AVX | |
427 | * | |
428 | * @param len Length of each vector in bytes. Must be >= 16. | |
429 | * @param vlen Number of vector sources. | |
430 | * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants | |
431 | * based on the array of input coefficients. | |
432 | * @param src Array of pointers to source inputs. | |
433 | * @param dest Array of pointers to destination data buffers. | |
434 | * @returns none | |
435 | */ | |
436 | ||
437 | void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
438 | unsigned char **src, unsigned char **dest); | |
439 | ||
440 | /** | |
441 | * @brief GF(2^8) vector dot product with four outputs. | |
442 | * | |
443 | * Vector dot product optimized to calculate four ouputs at a time. Does four | |
444 | * GF(2^8) dot products across each byte of the input array and four constant | |
445 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
446 | * erasure coding encode and decode. Function requires pre-calculation of a | |
447 | * 4*32*vlen byte constant array based on the four sets of input coefficients. | |
448 | * @requires AVX2 | |
449 | * | |
450 | * @param len Length of each vector in bytes. Must be >= 32. | |
451 | * @param vlen Number of vector sources. | |
452 | * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants | |
453 | * based on the array of input coefficients. | |
454 | * @param src Array of pointers to source inputs. | |
455 | * @param dest Array of pointers to destination data buffers. | |
456 | * @returns none | |
457 | */ | |
458 | ||
459 | void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
460 | unsigned char **src, unsigned char **dest); | |
461 | ||
462 | /** | |
463 | * @brief GF(2^8) vector dot product with five outputs. | |
464 | * | |
465 | * Vector dot product optimized to calculate five ouputs at a time. Does five | |
466 | * GF(2^8) dot products across each byte of the input array and five constant | |
467 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
468 | * erasure coding encode and decode. Function requires pre-calculation of a | |
469 | * 5*32*vlen byte constant array based on the five sets of input coefficients. | |
470 | * @requires SSE4.1 | |
471 | * | |
472 | * @param len Length of each vector in bytes. Must >= 16. | |
473 | * @param vlen Number of vector sources. | |
474 | * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants | |
475 | * based on the array of input coefficients. | |
476 | * @param src Array of pointers to source inputs. | |
477 | * @param dest Array of pointers to destination data buffers. | |
478 | * @returns none | |
479 | */ | |
480 | ||
481 | void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
482 | unsigned char **src, unsigned char **dest); | |
483 | ||
484 | /** | |
485 | * @brief GF(2^8) vector dot product with five outputs. | |
486 | * | |
487 | * Vector dot product optimized to calculate five ouputs at a time. Does five | |
488 | * GF(2^8) dot products across each byte of the input array and five constant | |
489 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
490 | * erasure coding encode and decode. Function requires pre-calculation of a | |
491 | * 5*32*vlen byte constant array based on the five sets of input coefficients. | |
492 | * @requires AVX | |
493 | * | |
494 | * @param len Length of each vector in bytes. Must >= 16. | |
495 | * @param vlen Number of vector sources. | |
496 | * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants | |
497 | * based on the array of input coefficients. | |
498 | * @param src Array of pointers to source inputs. | |
499 | * @param dest Array of pointers to destination data buffers. | |
500 | * @returns none | |
501 | */ | |
502 | ||
503 | void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
504 | unsigned char **src, unsigned char **dest); | |
505 | ||
506 | /** | |
507 | * @brief GF(2^8) vector dot product with five outputs. | |
508 | * | |
509 | * Vector dot product optimized to calculate five ouputs at a time. Does five | |
510 | * GF(2^8) dot products across each byte of the input array and five constant | |
511 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
512 | * erasure coding encode and decode. Function requires pre-calculation of a | |
513 | * 5*32*vlen byte constant array based on the five sets of input coefficients. | |
514 | * @requires AVX2 | |
515 | * | |
516 | * @param len Length of each vector in bytes. Must >= 32. | |
517 | * @param vlen Number of vector sources. | |
518 | * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants | |
519 | * based on the array of input coefficients. | |
520 | * @param src Array of pointers to source inputs. | |
521 | * @param dest Array of pointers to destination data buffers. | |
522 | * @returns none | |
523 | */ | |
524 | ||
525 | void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
526 | unsigned char **src, unsigned char **dest); | |
527 | ||
528 | /** | |
529 | * @brief GF(2^8) vector dot product with six outputs. | |
530 | * | |
531 | * Vector dot product optimized to calculate six ouputs at a time. Does six | |
532 | * GF(2^8) dot products across each byte of the input array and six constant | |
533 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
534 | * erasure coding encode and decode. Function requires pre-calculation of a | |
535 | * 6*32*vlen byte constant array based on the six sets of input coefficients. | |
536 | * @requires SSE4.1 | |
537 | * | |
538 | * @param len Length of each vector in bytes. Must be >= 16. | |
539 | * @param vlen Number of vector sources. | |
540 | * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants | |
541 | * based on the array of input coefficients. | |
542 | * @param src Array of pointers to source inputs. | |
543 | * @param dest Array of pointers to destination data buffers. | |
544 | * @returns none | |
545 | */ | |
546 | ||
547 | void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, | |
548 | unsigned char **src, unsigned char **dest); | |
549 | ||
550 | /** | |
551 | * @brief GF(2^8) vector dot product with six outputs. | |
552 | * | |
553 | * Vector dot product optimized to calculate six ouputs at a time. Does six | |
554 | * GF(2^8) dot products across each byte of the input array and six constant | |
555 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
556 | * erasure coding encode and decode. Function requires pre-calculation of a | |
557 | * 6*32*vlen byte constant array based on the six sets of input coefficients. | |
558 | * @requires AVX | |
559 | * | |
560 | * @param len Length of each vector in bytes. Must be >= 16. | |
561 | * @param vlen Number of vector sources. | |
562 | * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants | |
563 | * based on the array of input coefficients. | |
564 | * @param src Array of pointers to source inputs. | |
565 | * @param dest Array of pointers to destination data buffers. | |
566 | * @returns none | |
567 | */ | |
568 | ||
569 | void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, | |
570 | unsigned char **src, unsigned char **dest); | |
571 | ||
572 | /** | |
573 | * @brief GF(2^8) vector dot product with six outputs. | |
574 | * | |
575 | * Vector dot product optimized to calculate six ouputs at a time. Does six | |
576 | * GF(2^8) dot products across each byte of the input array and six constant | |
577 | * sets of coefficients to produce each byte of the outputs. Can be used for | |
578 | * erasure coding encode and decode. Function requires pre-calculation of a | |
579 | * 6*32*vlen byte constant array based on the six sets of input coefficients. | |
580 | * @requires AVX2 | |
581 | * | |
582 | * @param len Length of each vector in bytes. Must be >= 32. | |
583 | * @param vlen Number of vector sources. | |
584 | * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants | |
585 | * based on the array of input coefficients. | |
586 | * @param src Array of pointers to source inputs. | |
587 | * @param dest Array of pointers to destination data buffers. | |
588 | * @returns none | |
589 | */ | |
590 | ||
591 | void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, | |
592 | unsigned char **src, unsigned char **dest); | |
593 | ||
594 | /** | |
595 | * @brief GF(2^8) vector dot product, runs baseline version. | |
596 | * | |
597 | * Does a GF(2^8) dot product across each byte of the input array and a constant | |
598 | * set of coefficients to produce each byte of the output. Can be used for | |
599 | * erasure coding encode and decode. Function requires pre-calculation of a | |
600 | * 32*vlen byte constant array based on the input coefficients. | |
601 | * | |
602 | * @param len Length of each vector in bytes. Must be >= 16. | |
603 | * @param vlen Number of vector sources. | |
604 | * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based | |
605 | * on the array of input coefficients. Only elements 32*CONST*j + 1 | |
606 | * of this array are used, where j = (0, 1, 2...) and CONST is the | |
607 | * number of elements in the array of input coefficients. The | |
608 | * elements used correspond to the original input coefficients. | |
609 | * @param src Array of pointers to source inputs. | |
610 | * @param dest Pointer to destination data array. | |
611 | * @returns none | |
612 | */ | |
613 | ||
614 | void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, | |
615 | unsigned char **src, unsigned char *dest); | |
616 | ||
617 | /** | |
618 | * @brief GF(2^8) vector dot product, runs appropriate version. | |
619 | * | |
620 | * Does a GF(2^8) dot product across each byte of the input array and a constant | |
621 | * set of coefficients to produce each byte of the output. Can be used for | |
622 | * erasure coding encode and decode. Function requires pre-calculation of a | |
623 | * 32*vlen byte constant array based on the input coefficients. | |
624 | * | |
625 | * This function determines what instruction sets are enabled and | |
626 | * selects the appropriate version at runtime. | |
627 | * | |
628 | * @param len Length of each vector in bytes. Must be >= 32. | |
629 | * @param vlen Number of vector sources. | |
630 | * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based | |
631 | * on the array of input coefficients. | |
632 | * @param src Array of pointers to source inputs. | |
633 | * @param dest Pointer to destination data array. | |
634 | * @returns none | |
635 | */ | |
636 | ||
637 | void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, | |
638 | unsigned char **src, unsigned char *dest); | |
639 | ||
640 | ||
641 | /** | |
642 | * @brief GF(2^8) vector multiply accumulate, runs appropriate version. | |
643 | * | |
644 | * Does a GF(2^8) multiply across each byte of input source with expanded | |
645 | * constant and add to destination array. Can be used for erasure coding encode | |
646 | * and decode update when only one source is available at a time. Function | |
647 | * requires pre-calculation of a 32*vec byte constant array based on the input | |
648 | * coefficients. | |
649 | * | |
650 | * This function determines what instruction sets are enabled and selects the | |
651 | * appropriate version at runtime. | |
652 | * | |
653 | * @param len Length of each vector in bytes. Must be >= 32. | |
654 | * @param vec The number of vector sources or rows in the generator matrix | |
655 | * for coding. | |
656 | * @param vec_i The vector index corresponding to the single input source. | |
657 | * @param gftbls Pointer to array of input tables generated from coding | |
658 | * coefficients in ec_init_tables(). Must be of size 32*vec. | |
659 | * @param src Array of pointers to source inputs. | |
660 | * @param dest Pointer to destination data array. | |
661 | * @returns none | |
662 | */ | |
663 | ||
664 | void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
665 | unsigned char *dest); | |
666 | ||
667 | /** | |
668 | * @brief GF(2^8) vector multiply accumulate, arch specific version. | |
669 | * | |
670 | * Arch specific version of gf_vect_mad() with same parameters. | |
671 | * @requires SSE4.1 | |
672 | */ | |
673 | ||
674 | void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
675 | unsigned char *dest); | |
676 | /** | |
677 | * @brief GF(2^8) vector multiply accumulate, arch specific version. | |
678 | * | |
679 | * Arch specific version of gf_vect_mad() with same parameters. | |
680 | * @requires AVX | |
681 | */ | |
682 | ||
683 | void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
684 | unsigned char *dest); | |
685 | ||
686 | /** | |
687 | * @brief GF(2^8) vector multiply accumulate, arch specific version. | |
688 | * | |
689 | * Arch specific version of gf_vect_mad() with same parameters. | |
690 | * @requires AVX2 | |
691 | */ | |
692 | ||
693 | void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
694 | unsigned char *dest); | |
695 | ||
696 | /** | |
697 | * @brief GF(2^8) vector multiply accumulate, baseline version. | |
698 | * | |
699 | * Baseline version of gf_vect_mad() with same parameters. | |
700 | */ | |
701 | ||
702 | void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, | |
703 | unsigned char *dest); | |
704 | ||
705 | /** | |
706 | * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. | |
707 | * | |
708 | * Does a GF(2^8) multiply across each byte of input source with expanded | |
709 | * constants and add to destination arrays. Can be used for erasure coding | |
710 | * encode and decode update when only one source is available at a | |
711 | * time. Function requires pre-calculation of a 32*vec byte constant array based | |
712 | * on the input coefficients. | |
713 | * @requires SSE4.1 | |
714 | * | |
715 | * @param len Length of each vector in bytes. Must be >= 32. | |
716 | * @param vec The number of vector sources or rows in the generator matrix | |
717 | * for coding. | |
718 | * @param vec_i The vector index corresponding to the single input source. | |
719 | * @param gftbls Pointer to array of input tables generated from coding | |
720 | * coefficients in ec_init_tables(). Must be of size 32*vec. | |
721 | * @param src Pointer to source input array. | |
722 | * @param dest Array of pointers to destination input/outputs. | |
723 | * @returns none | |
724 | */ | |
725 | ||
726 | void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
727 | unsigned char **dest); | |
728 | ||
729 | /** | |
730 | * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). | |
731 | * @requires AVX | |
732 | */ | |
733 | void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
734 | unsigned char **dest); | |
735 | /** | |
736 | * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). | |
737 | * @requires AVX2 | |
738 | */ | |
739 | void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
740 | unsigned char **dest); | |
741 | ||
742 | /** | |
743 | * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. | |
744 | * | |
745 | * Does a GF(2^8) multiply across each byte of input source with expanded | |
746 | * constants and add to destination arrays. Can be used for erasure coding | |
747 | * encode and decode update when only one source is available at a | |
748 | * time. Function requires pre-calculation of a 32*vec byte constant array based | |
749 | * on the input coefficients. | |
750 | * @requires SSE4.1 | |
751 | * | |
752 | * @param len Length of each vector in bytes. Must be >= 32. | |
753 | * @param vec The number of vector sources or rows in the generator matrix | |
754 | * for coding. | |
755 | * @param vec_i The vector index corresponding to the single input source. | |
756 | * @param gftbls Pointer to array of input tables generated from coding | |
757 | * coefficients in ec_init_tables(). Must be of size 32*vec. | |
758 | * @param src Pointer to source input array. | |
759 | * @param dest Array of pointers to destination input/outputs. | |
760 | * @returns none | |
761 | */ | |
762 | ||
763 | void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
764 | unsigned char **dest); | |
765 | ||
766 | /** | |
767 | * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). | |
768 | * @requires AVX | |
769 | */ | |
770 | void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
771 | unsigned char **dest); | |
772 | ||
773 | /** | |
774 | * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). | |
775 | * @requires AVX2 | |
776 | */ | |
777 | void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
778 | unsigned char **dest); | |
779 | ||
780 | /** | |
781 | * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. | |
782 | * | |
783 | * Does a GF(2^8) multiply across each byte of input source with expanded | |
784 | * constants and add to destination arrays. Can be used for erasure coding | |
785 | * encode and decode update when only one source is available at a | |
786 | * time. Function requires pre-calculation of a 32*vec byte constant array based | |
787 | * on the input coefficients. | |
788 | * @requires SSE4.1 | |
789 | * | |
790 | * @param len Length of each vector in bytes. Must be >= 32. | |
791 | * @param vec The number of vector sources or rows in the generator matrix | |
792 | * for coding. | |
793 | * @param vec_i The vector index corresponding to the single input source. | |
794 | * @param gftbls Pointer to array of input tables generated from coding | |
795 | * coefficients in ec_init_tables(). Must be of size 32*vec. | |
796 | * @param src Pointer to source input array. | |
797 | * @param dest Array of pointers to destination input/outputs. | |
798 | * @returns none | |
799 | */ | |
800 | ||
801 | void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
802 | unsigned char **dest); | |
803 | ||
804 | /** | |
805 | * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). | |
806 | * @requires AVX | |
807 | */ | |
808 | void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
809 | unsigned char **dest); | |
810 | /** | |
811 | * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). | |
812 | * @requires AVX2 | |
813 | */ | |
814 | void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
815 | unsigned char **dest); | |
816 | ||
817 | /** | |
818 | * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. | |
819 | * @requires SSE4.1 | |
820 | */ | |
821 | void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
822 | unsigned char **dest); | |
823 | ||
824 | /** | |
825 | * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. | |
826 | * @requires AVX | |
827 | */ | |
828 | void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
829 | unsigned char **dest); | |
830 | /** | |
831 | * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. | |
832 | * @requires AVX2 | |
833 | */ | |
834 | void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
835 | unsigned char **dest); | |
836 | ||
837 | /** | |
838 | * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. | |
839 | * @requires SSE4.1 | |
840 | */ | |
841 | void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
842 | unsigned char **dest); | |
843 | /** | |
844 | * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. | |
845 | * @requires AVX | |
846 | */ | |
847 | void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
848 | unsigned char **dest); | |
849 | ||
850 | /** | |
851 | * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. | |
852 | * @requires AVX2 | |
853 | */ | |
854 | void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, | |
855 | unsigned char **dest); | |
856 | ||
857 | ||
858 | /********************************************************************** | |
859 | * The remaining are lib support functions used in GF(2^8) operations. | |
860 | */ | |
861 | ||
862 | /** | |
863 | * @brief Single element GF(2^8) multiply. | |
864 | * | |
865 | * @param a Multiplicand a | |
866 | * @param b Multiplicand b | |
867 | * @returns Product of a and b in GF(2^8) | |
868 | */ | |
869 | ||
870 | unsigned char gf_mul(unsigned char a, unsigned char b); | |
871 | ||
872 | /** | |
873 | * @brief Single element GF(2^8) inverse. | |
874 | * | |
875 | * @param a Input element | |
876 | * @returns Field element b such that a x b = {1} | |
877 | */ | |
878 | ||
879 | unsigned char gf_inv(unsigned char a); | |
880 | ||
881 | /** | |
882 | * @brief Generate a matrix of coefficients to be used for encoding. | |
883 | * | |
884 | * Vandermonde matrix example of encoding coefficients where high portion of | |
885 | * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)} | |
886 | * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in | |
887 | * erasure encoding but does not guarantee invertable for every sub matrix. For | |
888 | * large k it is possible to find cases where the decode matrix chosen from | |
889 | * sources and parity not in erasure are not invertable. Users may want to | |
890 | * adjust for k > 5. | |
891 | * | |
892 | * @param a [mxk] array to hold coefficients | |
893 | * @param m number of rows in matrix corresponding to srcs + parity. | |
894 | * @param k number of columns in matrix corresponding to srcs. | |
895 | * @returns none | |
896 | */ | |
897 | ||
898 | void gf_gen_rs_matrix(unsigned char *a, int m, int k); | |
899 | ||
900 | /** | |
901 | * @brief Generate a Cauchy matrix of coefficients to be used for encoding. | |
902 | * | |
903 | * Cauchy matrix example of encoding coefficients where high portion of matrix | |
904 | * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, | |
905 | * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. | |
906 | * | |
907 | * @param a [mxk] array to hold coefficients | |
908 | * @param m number of rows in matrix corresponding to srcs + parity. | |
909 | * @param k number of columns in matrix corresponding to srcs. | |
910 | * @returns none | |
911 | */ | |
912 | ||
913 | void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); | |
914 | ||
915 | /** | |
916 | * @brief Invert a matrix in GF(2^8) | |
917 | * | |
918 | * @param in input matrix | |
919 | * @param out output matrix such that [in] x [out] = [I] - identity matrix | |
920 | * @param n size of matrix [nxn] | |
921 | * @returns 0 successful, other fail on singular input matrix | |
922 | */ | |
923 | ||
924 | int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); | |
925 | ||
926 | ||
927 | /*************************************************************/ | |
928 | ||
929 | #ifdef __cplusplus | |
930 | } | |
931 | #endif | |
932 | ||
933 | #endif //_ERASURE_CODE_H_ |