ceph/src/spdk/isa-l/include/erasure_code.h

   1 /**********************************************************************
   2   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
   3
   4   Redistribution and use in source and binary forms, with or without
   5   modification, are permitted provided that the following conditions
   6   are met:
   7     * Redistributions of source code must retain the above copyright
   8       notice, this list of conditions and the following disclaimer.
   9     * Redistributions in binary form must reproduce the above copyright
  10       notice, this list of conditions and the following disclaimer in
  11       the documentation and/or other materials provided with the
  12       distribution.
  13     * Neither the name of Intel Corporation nor the names of its
  14       contributors may be used to endorse or promote products derived
  15       from this software without specific prior written permission.
  16
  17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28 **********************************************************************/
  29
  30
  31 #ifndef _ERASURE_CODE_H_
  32 #define _ERASURE_CODE_H_
  33
  34 /**
  35  *  @file erasure_code.h
  36  *  @brief Interface to functions supporting erasure code encode and decode.
  37  *
  38  *  This file defines the interface to optimized functions used in erasure
  39  *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
  40  *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
  41  *  set of coefficients.  Values for the coefficients are determined by the type
  42  *  of erasure code.  Using a general dot product means that any sequence of
  43  *  coefficients may be used including erasure codes based on random
  44  *  coefficients.
  45  *  Multiple versions of dot product are supplied to calculate 1-6 output
  46  *  vectors in one pass.
  47  *  Base GF multiply and divide functions can be sped up by defining
  48  *  GF_LARGE_TABLES at the expense of memory size.
  49  *
  50  */
  51
  52 #include "gf_vect_mul.h"
  53
  54 #ifdef __cplusplus
  55 extern "C" {
  56 #endif
  57
  58 /**
  59  * @brief Initialize tables for fast Erasure Code encode and decode.
  60  *
  61  * Generates the expanded tables needed for fast encode or decode for erasure
  62  * codes on blocks of data.  32bytes is generated for each input coefficient.
  63  *
  64  * @param k      The number of vector sources or rows in the generator matrix
  65  *               for coding.
  66  * @param rows   The number of output vectors to concurrently encode/decode.
  67  * @param a      Pointer to sets of arrays of input coefficients used to encode
  68  *               or decode data.
  69  * @param gftbls Pointer to start of space for concatenated output tables
  70  *               generated from input coefficients.  Must be of size 32*k*rows.
  71  * @returns none
  72  */
  73
  74 void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
  75
  76 /**
  77  * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
  78  *
  79  * Given a list of source data blocks, generate one or multiple blocks of
  80  * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
  81  * suitable set of coefficients, this function will perform the fast generation
  82  * or decoding of Reed-Solomon type erasure codes.
  83  *
  84  * This function determines what instruction sets are enabled and
  85  * selects the appropriate version at runtime.
  86  *
  87  * @param len    Length of each block of data (vector) of source or dest data.
  88  * @param k      The number of vector sources or rows in the generator matrix
  89  *               for coding.
  90  * @param rows   The number of output vectors to concurrently encode/decode.
  91  * @param gftbls Pointer to array of input tables generated from coding
  92  *               coefficients in ec_init_tables(). Must be of size 32*k*rows
  93  * @param data   Array of pointers to source input buffers.
  94  * @param coding Array of pointers to coded output buffers.
  95  * @returns none
  96  */
  97
  98 void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
  99                     unsigned char **coding);
 100
 101 /**
 102  * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
 103  *
 104  * Baseline version of ec_encode_data() with same parameters.
 105  */
 106 void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
 107                          unsigned char **dest);
 108
 109 /**
 110  * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
 111  *
 112  * Given one source data block, update one or multiple blocks of encoded data as
 113  * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
 114  * coefficients, this function will perform the fast generation or decoding of
 115  * Reed-Solomon type erasure codes from one input source at a time.
 116  *
 117  * This function determines what instruction sets are enabled and selects the
 118  * appropriate version at runtime.
 119  *
 120  * @param len    Length of each block of data (vector) of source or dest data.
 121  * @param k      The number of vector sources or rows in the generator matrix
 122  *               for coding.
 123  * @param rows   The number of output vectors to concurrently encode/decode.
 124  * @param vec_i  The vector index corresponding to the single input source.
 125  * @param g_tbls Pointer to array of input tables generated from coding
 126  *               coefficients in ec_init_tables(). Must be of size 32*k*rows
 127  * @param data   Pointer to single input source used to update output parity.
 128  * @param coding Array of pointers to coded output buffers.
 129  * @returns none
 130  */
 131 void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 132                            unsigned char *data, unsigned char **coding);
 133
 134 /**
 135  * @brief Generate update for encode or decode of erasure codes from single source.
 136  *
 137  * Baseline version of ec_encode_data_update().
 138  */
 139
 140 void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
 141                                 unsigned char *data, unsigned char **dest);
 142
 143 /**
 144  * @brief GF(2^8) vector dot product, runs baseline version.
 145  *
 146  * Does a GF(2^8) dot product across each byte of the input array and a constant
 147  * set of coefficients to produce each byte of the output. Can be used for
 148  * erasure coding encode and decode. Function requires pre-calculation of a
 149  * 32*vlen byte constant array based on the input coefficients.
 150  *
 151  * @param len    Length of each vector in bytes. Must be >= 16.
 152  * @param vlen   Number of vector sources.
 153  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 154  *               on the array of input coefficients. Only elements 32*CONST*j + 1
 155  *               of this array are used, where j = (0, 1, 2...) and CONST is the
 156  *               number of elements in the array of input coefficients. The
 157  *               elements used correspond to the original input coefficients.
 158  * @param src    Array of pointers to source inputs.
 159  * @param dest   Pointer to destination data array.
 160  * @returns none
 161  */
 162
 163
 164 void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
 165                         unsigned char **src, unsigned char *dest);
 166
 167 /**
 168  * @brief GF(2^8) vector dot product, runs appropriate version.
 169  *
 170  * Does a GF(2^8) dot product across each byte of the input array and a constant
 171  * set of coefficients to produce each byte of the output. Can be used for
 172  * erasure coding encode and decode. Function requires pre-calculation of a
 173  * 32*vlen byte constant array based on the input coefficients.
 174  *
 175  * This function determines what instruction sets are enabled and
 176  * selects the appropriate version at runtime.
 177  *
 178  * @param len    Length of each vector in bytes. Must be >= 32.
 179  * @param vlen   Number of vector sources.
 180  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 181  *               on the array of input coefficients.
 182  * @param src    Array of pointers to source inputs.
 183  * @param dest   Pointer to destination data array.
 184  * @returns none
 185  */
 186
 187 void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
 188                         unsigned char **src, unsigned char *dest);
 189
 190 /**
 191  * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
 192  *
 193  * Does a GF(2^8) multiply across each byte of input source with expanded
 194  * constant and add to destination array. Can be used for erasure coding encode
 195  * and decode update when only one source is available at a time. Function
 196  * requires pre-calculation of a 32*vec byte constant array based on the input
 197  * coefficients.
 198  *
 199  * This function determines what instruction sets are enabled and selects the
 200  * appropriate version at runtime.
 201  *
 202  * @param len    Length of each vector in bytes. Must be >= 64.
 203  * @param vec    The number of vector sources or rows in the generator matrix
 204  *               for coding.
 205  * @param vec_i  The vector index corresponding to the single input source.
 206  * @param gftbls Pointer to array of input tables generated from coding
 207  *               coefficients in ec_init_tables(). Must be of size 32*vec.
 208  * @param src    Array of pointers to source inputs.
 209  * @param dest   Pointer to destination data array.
 210  * @returns none
 211  */
 212
 213 void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 214                  unsigned char *dest);
 215
 216 /**
 217  * @brief GF(2^8) vector multiply accumulate, baseline version.
 218  *
 219  * Baseline version of gf_vect_mad() with same parameters.
 220  */
 221
 222 void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
 223                       unsigned char *dest);
 224
 225 // x86 only
 226 #if defined(__i386__) || defined(__x86_64__)
 227
 228 /**
 229  * @brief Generate or decode erasure codes on blocks of data.
 230  *
 231  * Arch specific version of ec_encode_data() with same parameters.
 232  * @requires SSE4.1
 233  */
 234 void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
 235                         unsigned char **coding);
 236
 237 /**
 238  * @brief Generate or decode erasure codes on blocks of data.
 239  *
 240  * Arch specific version of ec_encode_data() with same parameters.
 241  * @requires AVX
 242  */
 243 void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
 244                         unsigned char **coding);
 245
 246 /**
 247  * @brief Generate or decode erasure codes on blocks of data.
 248  *
 249  * Arch specific version of ec_encode_data() with same parameters.
 250  * @requires AVX2
 251  */
 252 void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
 253                          unsigned char **coding);
 254
 255 /**
 256  * @brief Generate update for encode or decode of erasure codes from single source.
 257  *
 258  * Arch specific version of ec_encode_data_update() with same parameters.
 259  * @requires SSE4.1
 260  */
 261
 262 void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 263                                unsigned char *data, unsigned char **coding);
 264
 265 /**
 266  * @brief Generate update for encode or decode of erasure codes from single source.
 267  *
 268  * Arch specific version of ec_encode_data_update() with same parameters.
 269  * @requires AVX
 270  */
 271
 272 void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 273                                unsigned char *data, unsigned char **coding);
 274
 275 /**
 276  * @brief Generate update for encode or decode of erasure codes from single source.
 277  *
 278  * Arch specific version of ec_encode_data_update() with same parameters.
 279  * @requires AVX2
 280  */
 281
 282 void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
 283                                 unsigned char *data, unsigned char **coding);
 284
 285 /**
 286  * @brief GF(2^8) vector dot product.
 287  *
 288  * Does a GF(2^8) dot product across each byte of the input array and a constant
 289  * set of coefficients to produce each byte of the output. Can be used for
 290  * erasure coding encode and decode. Function requires pre-calculation of a
 291  * 32*vlen byte constant array based on the input coefficients.
 292  * @requires SSE4.1
 293  *
 294  * @param len    Length of each vector in bytes. Must be >= 16.
 295  * @param vlen   Number of vector sources.
 296  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 297  *               on the array of input coefficients.
 298  * @param src    Array of pointers to source inputs.
 299  * @param dest   Pointer to destination data array.
 300  * @returns none
 301  */
 302
 303 void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 304                         unsigned char **src, unsigned char *dest);
 305
 306 /**
 307  * @brief GF(2^8) vector dot product.
 308  *
 309  * Does a GF(2^8) dot product across each byte of the input array and a constant
 310  * set of coefficients to produce each byte of the output. Can be used for
 311  * erasure coding encode and decode. Function requires pre-calculation of a
 312  * 32*vlen byte constant array based on the input coefficients.
 313  * @requires AVX
 314  *
 315  * @param len    Length of each vector in bytes. Must be >= 16.
 316  * @param vlen   Number of vector sources.
 317  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 318  *               on the array of input coefficients.
 319  * @param src    Array of pointers to source inputs.
 320  * @param dest   Pointer to destination data array.
 321  * @returns none
 322  */
 323
 324 void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 325                         unsigned char **src, unsigned char *dest);
 326
 327 /**
 328  * @brief GF(2^8) vector dot product.
 329  *
 330  * Does a GF(2^8) dot product across each byte of the input array and a constant
 331  * set of coefficients to produce each byte of the output. Can be used for
 332  * erasure coding encode and decode. Function requires pre-calculation of a
 333  * 32*vlen byte constant array based on the input coefficients.
 334  * @requires AVX2
 335  *
 336  * @param len    Length of each vector in bytes. Must be >= 32.
 337  * @param vlen   Number of vector sources.
 338  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 339  *               on the array of input coefficients.
 340  * @param src    Array of pointers to source inputs.
 341  * @param dest   Pointer to destination data array.
 342  * @returns none
 343  */
 344
 345 void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 346                         unsigned char **src, unsigned char *dest);
 347
 348 /**
 349  * @brief GF(2^8) vector dot product with two outputs.
 350  *
 351  * Vector dot product optimized to calculate two outputs at a time. Does two
 352  * GF(2^8) dot products across each byte of the input array and two constant
 353  * sets of coefficients to produce each byte of the outputs. Can be used for
 354  * erasure coding encode and decode. Function requires pre-calculation of a
 355  * 2*32*vlen byte constant array based on the two sets of input coefficients.
 356  * @requires SSE4.1
 357  *
 358  * @param len    Length of each vector in bytes. Must be >= 16.
 359  * @param vlen   Number of vector sources.
 360  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
 361  *               based on the array of input coefficients.
 362  * @param src    Array of pointers to source inputs.
 363  * @param dest   Array of pointers to destination data buffers.
 364  * @returns none
 365  */
 366
 367 void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 368                         unsigned char **src, unsigned char **dest);
 369
 370 /**
 371  * @brief GF(2^8) vector dot product with two outputs.
 372  *
 373  * Vector dot product optimized to calculate two outputs at a time. Does two
 374  * GF(2^8) dot products across each byte of the input array and two constant
 375  * sets of coefficients to produce each byte of the outputs. Can be used for
 376  * erasure coding encode and decode. Function requires pre-calculation of a
 377  * 2*32*vlen byte constant array based on the two sets of input coefficients.
 378  * @requires AVX
 379  *
 380  * @param len    Length of each vector in bytes. Must be >= 16.
 381  * @param vlen   Number of vector sources.
 382  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
 383  *               based on the array of input coefficients.
 384  * @param src    Array of pointers to source inputs.
 385  * @param dest   Array of pointers to destination data buffers.
 386  * @returns none
 387  */
 388
 389 void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 390                         unsigned char **src, unsigned char **dest);
 391
 392 /**
 393  * @brief GF(2^8) vector dot product with two outputs.
 394  *
 395  * Vector dot product optimized to calculate two outputs at a time. Does two
 396  * GF(2^8) dot products across each byte of the input array and two constant
 397  * sets of coefficients to produce each byte of the outputs. Can be used for
 398  * erasure coding encode and decode. Function requires pre-calculation of a
 399  * 2*32*vlen byte constant array based on the two sets of input coefficients.
 400  * @requires AVX2
 401  *
 402  * @param len    Length of each vector in bytes. Must be >= 32.
 403  * @param vlen   Number of vector sources.
 404  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
 405  *               based on the array of input coefficients.
 406  * @param src    Array of pointers to source inputs.
 407  * @param dest   Array of pointers to destination data buffers.
 408  * @returns none
 409  */
 410
 411 void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 412                         unsigned char **src, unsigned char **dest);
 413
 414 /**
 415  * @brief GF(2^8) vector dot product with three outputs.
 416  *
 417  * Vector dot product optimized to calculate three outputs at a time. Does three
 418  * GF(2^8) dot products across each byte of the input array and three constant
 419  * sets of coefficients to produce each byte of the outputs. Can be used for
 420  * erasure coding encode and decode. Function requires pre-calculation of a
 421  * 3*32*vlen byte constant array based on the three sets of input coefficients.
 422  * @requires SSE4.1
 423  *
 424  * @param len    Length of each vector in bytes. Must be >= 16.
 425  * @param vlen   Number of vector sources.
 426  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
 427  *               based on the array of input coefficients.
 428  * @param src    Array of pointers to source inputs.
 429  * @param dest   Array of pointers to destination data buffers.
 430  * @returns none
 431  */
 432
 433 void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 434                         unsigned char **src, unsigned char **dest);
 435
 436 /**
 437  * @brief GF(2^8) vector dot product with three outputs.
 438  *
 439  * Vector dot product optimized to calculate three outputs at a time. Does three
 440  * GF(2^8) dot products across each byte of the input array and three constant
 441  * sets of coefficients to produce each byte of the outputs. Can be used for
 442  * erasure coding encode and decode. Function requires pre-calculation of a
 443  * 3*32*vlen byte constant array based on the three sets of input coefficients.
 444  * @requires AVX
 445  *
 446  * @param len    Length of each vector in bytes. Must be >= 16.
 447  * @param vlen   Number of vector sources.
 448  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
 449  *               based on the array of input coefficients.
 450  * @param src    Array of pointers to source inputs.
 451  * @param dest   Array of pointers to destination data buffers.
 452  * @returns none
 453  */
 454
 455 void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 456                         unsigned char **src, unsigned char **dest);
 457
 458 /**
 459  * @brief GF(2^8) vector dot product with three outputs.
 460  *
 461  * Vector dot product optimized to calculate three outputs at a time. Does three
 462  * GF(2^8) dot products across each byte of the input array and three constant
 463  * sets of coefficients to produce each byte of the outputs. Can be used for
 464  * erasure coding encode and decode. Function requires pre-calculation of a
 465  * 3*32*vlen byte constant array based on the three sets of input coefficients.
 466  * @requires AVX2
 467  *
 468  * @param len    Length of each vector in bytes. Must be >= 32.
 469  * @param vlen   Number of vector sources.
 470  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
 471  *               based on the array of input coefficients.
 472  * @param src    Array of pointers to source inputs.
 473  * @param dest   Array of pointers to destination data buffers.
 474  * @returns none
 475  */
 476
 477 void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 478                         unsigned char **src, unsigned char **dest);
 479
 480 /**
 481  * @brief GF(2^8) vector dot product with four outputs.
 482  *
 483  * Vector dot product optimized to calculate four outputs at a time. Does four
 484  * GF(2^8) dot products across each byte of the input array and four constant
 485  * sets of coefficients to produce each byte of the outputs. Can be used for
 486  * erasure coding encode and decode. Function requires pre-calculation of a
 487  * 4*32*vlen byte constant array based on the four sets of input coefficients.
 488  * @requires SSE4.1
 489  *
 490  * @param len    Length of each vector in bytes. Must be >= 16.
 491  * @param vlen   Number of vector sources.
 492  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
 493  *               based on the array of input coefficients.
 494  * @param src    Array of pointers to source inputs.
 495  * @param dest   Array of pointers to destination data buffers.
 496  * @returns none
 497  */
 498
 499 void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 500                         unsigned char **src, unsigned char **dest);
 501
 502 /**
 503  * @brief GF(2^8) vector dot product with four outputs.
 504  *
 505  * Vector dot product optimized to calculate four outputs at a time. Does four
 506  * GF(2^8) dot products across each byte of the input array and four constant
 507  * sets of coefficients to produce each byte of the outputs. Can be used for
 508  * erasure coding encode and decode. Function requires pre-calculation of a
 509  * 4*32*vlen byte constant array based on the four sets of input coefficients.
 510  * @requires AVX
 511  *
 512  * @param len    Length of each vector in bytes. Must be >= 16.
 513  * @param vlen   Number of vector sources.
 514  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
 515  *               based on the array of input coefficients.
 516  * @param src    Array of pointers to source inputs.
 517  * @param dest   Array of pointers to destination data buffers.
 518  * @returns none
 519  */
 520
 521 void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 522                         unsigned char **src, unsigned char **dest);
 523
 524 /**
 525  * @brief GF(2^8) vector dot product with four outputs.
 526  *
 527  * Vector dot product optimized to calculate four outputs at a time. Does four
 528  * GF(2^8) dot products across each byte of the input array and four constant
 529  * sets of coefficients to produce each byte of the outputs. Can be used for
 530  * erasure coding encode and decode. Function requires pre-calculation of a
 531  * 4*32*vlen byte constant array based on the four sets of input coefficients.
 532  * @requires AVX2
 533  *
 534  * @param len    Length of each vector in bytes. Must be >= 32.
 535  * @param vlen   Number of vector sources.
 536  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
 537  *               based on the array of input coefficients.
 538  * @param src    Array of pointers to source inputs.
 539  * @param dest   Array of pointers to destination data buffers.
 540  * @returns none
 541  */
 542
 543 void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 544                         unsigned char **src, unsigned char **dest);
 545
 546 /**
 547  * @brief GF(2^8) vector dot product with five outputs.
 548  *
 549  * Vector dot product optimized to calculate five outputs at a time. Does five
 550  * GF(2^8) dot products across each byte of the input array and five constant
 551  * sets of coefficients to produce each byte of the outputs. Can be used for
 552  * erasure coding encode and decode. Function requires pre-calculation of a
 553  * 5*32*vlen byte constant array based on the five sets of input coefficients.
 554  * @requires SSE4.1
 555  *
 556  * @param len    Length of each vector in bytes. Must >= 16.
 557  * @param vlen   Number of vector sources.
 558  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
 559  *               based on the array of input coefficients.
 560  * @param src    Array of pointers to source inputs.
 561  * @param dest   Array of pointers to destination data buffers.
 562  * @returns none
 563  */
 564
 565 void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 566                         unsigned char **src, unsigned char **dest);
 567
 568 /**
 569  * @brief GF(2^8) vector dot product with five outputs.
 570  *
 571  * Vector dot product optimized to calculate five outputs at a time. Does five
 572  * GF(2^8) dot products across each byte of the input array and five constant
 573  * sets of coefficients to produce each byte of the outputs. Can be used for
 574  * erasure coding encode and decode. Function requires pre-calculation of a
 575  * 5*32*vlen byte constant array based on the five sets of input coefficients.
 576  * @requires AVX
 577  *
 578  * @param len    Length of each vector in bytes. Must >= 16.
 579  * @param vlen   Number of vector sources.
 580  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
 581  *               based on the array of input coefficients.
 582  * @param src    Array of pointers to source inputs.
 583  * @param dest   Array of pointers to destination data buffers.
 584  * @returns none
 585  */
 586
 587 void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 588                         unsigned char **src, unsigned char **dest);
 589
 590 /**
 591  * @brief GF(2^8) vector dot product with five outputs.
 592  *
 593  * Vector dot product optimized to calculate five outputs at a time. Does five
 594  * GF(2^8) dot products across each byte of the input array and five constant
 595  * sets of coefficients to produce each byte of the outputs. Can be used for
 596  * erasure coding encode and decode. Function requires pre-calculation of a
 597  * 5*32*vlen byte constant array based on the five sets of input coefficients.
 598  * @requires AVX2
 599  *
 600  * @param len    Length of each vector in bytes. Must >= 32.
 601  * @param vlen   Number of vector sources.
 602  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
 603  *               based on the array of input coefficients.
 604  * @param src    Array of pointers to source inputs.
 605  * @param dest   Array of pointers to destination data buffers.
 606  * @returns none
 607  */
 608
 609 void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 610                         unsigned char **src, unsigned char **dest);
 611
 612 /**
 613  * @brief GF(2^8) vector dot product with six outputs.
 614  *
 615  * Vector dot product optimized to calculate six outputs at a time. Does six
 616  * GF(2^8) dot products across each byte of the input array and six constant
 617  * sets of coefficients to produce each byte of the outputs. Can be used for
 618  * erasure coding encode and decode. Function requires pre-calculation of a
 619  * 6*32*vlen byte constant array based on the six sets of input coefficients.
 620  * @requires SSE4.1
 621  *
 622  * @param len    Length of each vector in bytes. Must be >= 16.
 623  * @param vlen   Number of vector sources.
 624  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
 625  *               based on the array of input coefficients.
 626  * @param src    Array of pointers to source inputs.
 627  * @param dest   Array of pointers to destination data buffers.
 628  * @returns none
 629  */
 630
 631 void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
 632                         unsigned char **src, unsigned char **dest);
 633
 634 /**
 635  * @brief GF(2^8) vector dot product with six outputs.
 636  *
 637  * Vector dot product optimized to calculate six outputs at a time. Does six
 638  * GF(2^8) dot products across each byte of the input array and six constant
 639  * sets of coefficients to produce each byte of the outputs. Can be used for
 640  * erasure coding encode and decode. Function requires pre-calculation of a
 641  * 6*32*vlen byte constant array based on the six sets of input coefficients.
 642  * @requires AVX
 643  *
 644  * @param len    Length of each vector in bytes. Must be >= 16.
 645  * @param vlen   Number of vector sources.
 646  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
 647  *               based on the array of input coefficients.
 648  * @param src    Array of pointers to source inputs.
 649  * @param dest   Array of pointers to destination data buffers.
 650  * @returns none
 651  */
 652
 653 void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
 654                         unsigned char **src, unsigned char **dest);
 655
 656 /**
 657  * @brief GF(2^8) vector dot product with six outputs.
 658  *
 659  * Vector dot product optimized to calculate six outputs at a time. Does six
 660  * GF(2^8) dot products across each byte of the input array and six constant
 661  * sets of coefficients to produce each byte of the outputs. Can be used for
 662  * erasure coding encode and decode. Function requires pre-calculation of a
 663  * 6*32*vlen byte constant array based on the six sets of input coefficients.
 664  * @requires AVX2
 665  *
 666  * @param len    Length of each vector in bytes. Must be >= 32.
 667  * @param vlen   Number of vector sources.
 668  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
 669  *               based on the array of input coefficients.
 670  * @param src    Array of pointers to source inputs.
 671  * @param dest   Array of pointers to destination data buffers.
 672  * @returns none
 673  */
 674
 675 void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
 676                         unsigned char **src, unsigned char **dest);
 677
 678 /**
 679  * @brief GF(2^8) vector multiply accumulate, arch specific version.
 680  *
 681  * Arch specific version of gf_vect_mad() with same parameters.
 682  * @requires SSE4.1
 683  */
 684
 685 void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 686                      unsigned char *dest);
 687 /**
 688  * @brief GF(2^8) vector multiply accumulate, arch specific version.
 689  *
 690  * Arch specific version of gf_vect_mad() with same parameters.
 691  * @requires AVX
 692  */
 693
 694 void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 695                      unsigned char *dest);
 696
 697 /**
 698  * @brief GF(2^8) vector multiply accumulate, arch specific version.
 699  *
 700  * Arch specific version of gf_vect_mad() with same parameters.
 701  * @requires AVX2
 702  */
 703
 704 void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 705                       unsigned char *dest);
 706
 707
 708 /**
 709  * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
 710  *
 711  * Does a GF(2^8) multiply across each byte of input source with expanded
 712  * constants and add to destination arrays. Can be used for erasure coding
 713  * encode and decode update when only one source is available at a
 714  * time. Function requires pre-calculation of a 32*vec byte constant array based
 715  * on the input coefficients.
 716  * @requires SSE4.1
 717  *
 718  * @param len    Length of each vector in bytes. Must be >= 32.
 719  * @param vec    The number of vector sources or rows in the generator matrix
 720  *               for coding.
 721  * @param vec_i  The vector index corresponding to the single input source.
 722  * @param gftbls Pointer to array of input tables generated from coding
 723  *               coefficients in ec_init_tables(). Must be of size 32*vec.
 724  * @param src    Pointer to source input array.
 725  * @param dest   Array of pointers to destination input/outputs.
 726  * @returns none
 727  */
 728
 729 void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 730                       unsigned char **dest);
 731
 732 /**
 733  * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
 734  * @requires AVX
 735  */
 736 void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 737                       unsigned char **dest);
 738 /**
 739  * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
 740  * @requires AVX2
 741  */
 742 void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 743                        unsigned char **dest);
 744
 745 /**
 746  * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
 747  *
 748  * Does a GF(2^8) multiply across each byte of input source with expanded
 749  * constants and add to destination arrays. Can be used for erasure coding
 750  * encode and decode update when only one source is available at a
 751  * time. Function requires pre-calculation of a 32*vec byte constant array based
 752  * on the input coefficients.
 753  * @requires SSE4.1
 754  *
 755  * @param len    Length of each vector in bytes. Must be >= 32.
 756  * @param vec    The number of vector sources or rows in the generator matrix
 757  *               for coding.
 758  * @param vec_i  The vector index corresponding to the single input source.
 759  * @param gftbls Pointer to array of input tables generated from coding
 760  *               coefficients in ec_init_tables(). Must be of size 32*vec.
 761  * @param src    Pointer to source input array.
 762  * @param dest   Array of pointers to destination input/outputs.
 763  * @returns none
 764  */
 765
 766 void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 767                       unsigned char **dest);
 768
 769 /**
 770  * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
 771  * @requires AVX
 772  */
 773 void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 774                       unsigned char **dest);
 775
 776 /**
 777  * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
 778  * @requires AVX2
 779  */
 780 void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 781                        unsigned char **dest);
 782
 783 /**
 784  * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
 785  *
 786  * Does a GF(2^8) multiply across each byte of input source with expanded
 787  * constants and add to destination arrays. Can be used for erasure coding
 788  * encode and decode update when only one source is available at a
 789  * time. Function requires pre-calculation of a 32*vec byte constant array based
 790  * on the input coefficients.
 791  * @requires SSE4.1
 792  *
 793  * @param len    Length of each vector in bytes. Must be >= 32.
 794  * @param vec    The number of vector sources or rows in the generator matrix
 795  *               for coding.
 796  * @param vec_i  The vector index corresponding to the single input source.
 797  * @param gftbls Pointer to array of input tables generated from coding
 798  *               coefficients in ec_init_tables(). Must be of size 32*vec.
 799  * @param src    Pointer to source input array.
 800  * @param dest   Array of pointers to destination input/outputs.
 801  * @returns none
 802  */
 803
 804 void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 805                       unsigned char **dest);
 806
 807 /**
 808  * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
 809  * @requires AVX
 810  */
 811 void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 812                       unsigned char **dest);
 813 /**
 814  * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
 815  * @requires AVX2
 816  */
 817 void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 818                        unsigned char **dest);
 819
 820 /**
 821  * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
 822  * @requires SSE4.1
 823  */
 824 void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 825                       unsigned char **dest);
 826
 827 /**
 828  * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
 829  * @requires AVX
 830  */
 831 void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 832                       unsigned char **dest);
 833 /**
 834  * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
 835  * @requires AVX2
 836  */
 837 void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 838                        unsigned char **dest);
 839
 840 /**
 841  * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
 842  * @requires SSE4.1
 843  */
 844 void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 845                       unsigned char **dest);
 846 /**
 847  * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
 848  * @requires AVX
 849  */
 850 void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 851                       unsigned char **dest);
 852
 853 /**
 854  * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
 855  * @requires AVX2
 856  */
 857 void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
 858                        unsigned char **dest);
 859
 860 #endif
 861
 862 /**********************************************************************
 863  * The remaining are lib support functions used in GF(2^8) operations.
 864  */
 865
 866 /**
 867  * @brief Single element GF(2^8) multiply.
 868  *
 869  * @param a  Multiplicand a
 870  * @param b  Multiplicand b
 871  * @returns  Product of a and b in GF(2^8)
 872  */
 873
 874 unsigned char gf_mul(unsigned char a, unsigned char b);
 875
 876 /**
 877  * @brief Single element GF(2^8) inverse.
 878  *
 879  * @param a  Input element
 880  * @returns  Field element b such that a x b = {1}
 881  */
 882
 883 unsigned char gf_inv(unsigned char a);
 884
 885 /**
 886  * @brief Generate a matrix of coefficients to be used for encoding.
 887  *
 888  * Vandermonde matrix example of encoding coefficients where high portion of
 889  * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
 890  * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
 891  * erasure encoding but does not guarantee invertable for every sub matrix. For
 892  * large pairs of m and k it is possible to find cases where the decode matrix
 893  * chosen from sources and parity is not invertable. Users may want to adjust
 894  * for certain pairs m and k. If m and k satisfy one of the following
 895  * inequalities, no adjustment is required:
 896  *
 897  * - k <= 3
 898  * - k = 4, m <= 25
 899  * - k = 5, m <= 10
 900  * - k <= 21, m-k = 4
 901  * - m - k <= 3.
 902  *
 903  * @param a  [m x k] array to hold coefficients
 904  * @param m  number of rows in matrix corresponding to srcs + parity.
 905  * @param k  number of columns in matrix corresponding to srcs.
 906  * @returns  none
 907  */
 908
 909 void gf_gen_rs_matrix(unsigned char *a, int m, int k);
 910
 911 /**
 912  * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
 913  *
 914  * Cauchy matrix example of encoding coefficients where high portion of matrix
 915  * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
 916  * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
 917  *
 918  * @param a  [m x k] array to hold coefficients
 919  * @param m  number of rows in matrix corresponding to srcs + parity.
 920  * @param k  number of columns in matrix corresponding to srcs.
 921  * @returns  none
 922  */
 923
 924 void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
 925
 926 /**
 927  * @brief Invert a matrix in GF(2^8)
 928  *
 929  * @param in  input matrix
 930  * @param out output matrix such that [in] x [out] = [I] - identity matrix
 931  * @param n   size of matrix [nxn]
 932  * @returns 0 successful, other fail on singular input matrix
 933  */
 934
 935 int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
 936
 937
 938 /*************************************************************/
 939
 940 #ifdef __cplusplus
 941 }
 942 #endif
 943
 944 #endif //_ERASURE_CODE_H_