]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /********************************************************************** |
2 | Copyright(c) 2011-2015 Intel Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Intel Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | #include <limits.h> | |
30 | #include "erasure_code.h" | |
31 | #include "types.h" | |
32 | ||
7c673cae FG |
33 | |
34 | void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, | |
35 | unsigned char **coding) | |
36 | { | |
37 | ||
38 | if (len < 16) { | |
39 | ec_encode_data_base(len, k, rows, g_tbls, data, coding); | |
40 | return; | |
41 | } | |
42 | ||
43 | while (rows >= 4) { | |
44 | gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding); | |
45 | g_tbls += 4 * k * 32; | |
46 | coding += 4; | |
47 | rows -= 4; | |
48 | } | |
49 | switch (rows) { | |
50 | case 3: | |
51 | gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding); | |
52 | break; | |
53 | case 2: | |
54 | gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding); | |
55 | break; | |
56 | case 1: | |
57 | gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding); | |
58 | break; | |
59 | case 0: | |
60 | break; | |
61 | } | |
62 | ||
63 | } | |
64 | ||
65 | void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, | |
66 | unsigned char **coding) | |
67 | { | |
68 | if (len < 16) { | |
69 | ec_encode_data_base(len, k, rows, g_tbls, data, coding); | |
70 | return; | |
71 | } | |
72 | ||
73 | while (rows >= 4) { | |
74 | gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding); | |
75 | g_tbls += 4 * k * 32; | |
76 | coding += 4; | |
77 | rows -= 4; | |
78 | } | |
79 | switch (rows) { | |
80 | case 3: | |
81 | gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding); | |
82 | break; | |
83 | case 2: | |
84 | gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding); | |
85 | break; | |
86 | case 1: | |
87 | gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding); | |
88 | break; | |
89 | case 0: | |
90 | break; | |
91 | } | |
92 | ||
93 | } | |
94 | ||
95 | void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data, | |
96 | unsigned char **coding) | |
97 | { | |
98 | ||
99 | if (len < 32) { | |
100 | ec_encode_data_base(len, k, rows, g_tbls, data, coding); | |
101 | return; | |
102 | } | |
103 | ||
104 | while (rows >= 4) { | |
105 | gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding); | |
106 | g_tbls += 4 * k * 32; | |
107 | coding += 4; | |
108 | rows -= 4; | |
109 | } | |
110 | switch (rows) { | |
111 | case 3: | |
112 | gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding); | |
113 | break; | |
114 | case 2: | |
115 | gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding); | |
116 | break; | |
117 | case 1: | |
118 | gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding); | |
119 | break; | |
120 | case 0: | |
121 | break; | |
122 | } | |
123 | ||
124 | } | |
125 | ||
126 | #ifdef HAVE_AS_KNOWS_AVX512 | |
127 | ||
128 | extern int gf_vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, unsigned char **data, | |
129 | unsigned char *dest); | |
130 | extern int gf_2vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, | |
131 | unsigned char **data, unsigned char **coding); | |
132 | extern int gf_3vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, | |
133 | unsigned char **data, unsigned char **coding); | |
134 | extern int gf_4vect_dot_prod_avx512(int len, int k, unsigned char *g_tbls, | |
135 | unsigned char **data, unsigned char **coding); | |
136 | extern void gf_vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, | |
137 | unsigned char *src, unsigned char *dest); | |
138 | extern void gf_2vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, | |
139 | unsigned char *src, unsigned char **dest); | |
140 | extern void gf_3vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, | |
141 | unsigned char *src, unsigned char **dest); | |
142 | extern void gf_4vect_mad_avx512(int len, int vec, int vec_i, unsigned char *gftbls, | |
143 | unsigned char *src, unsigned char **dest); | |
144 | ||
145 | void ec_encode_data_avx512(int len, int k, int rows, unsigned char *g_tbls, | |
146 | unsigned char **data, unsigned char **coding) | |
147 | { | |
148 | ||
149 | if (len < 64) { | |
150 | ec_encode_data_base(len, k, rows, g_tbls, data, coding); | |
151 | return; | |
152 | } | |
153 | ||
154 | while (rows >= 4) { | |
155 | gf_4vect_dot_prod_avx512(len, k, g_tbls, data, coding); | |
156 | g_tbls += 4 * k * 32; | |
157 | coding += 4; | |
158 | rows -= 4; | |
159 | } | |
160 | switch (rows) { | |
161 | case 3: | |
162 | gf_3vect_dot_prod_avx512(len, k, g_tbls, data, coding); | |
163 | break; | |
164 | case 2: | |
165 | gf_2vect_dot_prod_avx512(len, k, g_tbls, data, coding); | |
166 | break; | |
167 | case 1: | |
168 | gf_vect_dot_prod_avx512(len, k, g_tbls, data, *coding); | |
169 | break; | |
170 | case 0: | |
171 | break; | |
172 | } | |
173 | } | |
174 | ||
175 | void ec_encode_data_update_avx512(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
176 | unsigned char *data, unsigned char **coding) | |
177 | { | |
178 | if (len < 64) { | |
179 | ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); | |
180 | return; | |
181 | } | |
182 | ||
183 | while (rows >= 4) { | |
184 | gf_4vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); | |
185 | g_tbls += 4 * k * 32; | |
186 | coding += 4; | |
187 | rows -= 4; | |
188 | } | |
189 | switch (rows) { | |
190 | case 3: | |
191 | gf_3vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); | |
192 | break; | |
193 | case 2: | |
194 | gf_2vect_mad_avx512(len, k, vec_i, g_tbls, data, coding); | |
195 | break; | |
196 | case 1: | |
197 | gf_vect_mad_avx512(len, k, vec_i, g_tbls, data, *coding); | |
198 | break; | |
199 | case 0: | |
200 | break; | |
201 | } | |
202 | } | |
203 | ||
204 | #endif // HAVE_AS_KNOWS_AVX512 | |
205 | ||
206 | #if __WORDSIZE == 64 || _WIN64 || __x86_64__ | |
207 | ||
208 | void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
209 | unsigned char *data, unsigned char **coding) | |
210 | { | |
211 | if (len < 16) { | |
212 | ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); | |
213 | return; | |
214 | } | |
215 | ||
216 | while (rows > 6) { | |
217 | gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
218 | g_tbls += 6 * k * 32; | |
219 | coding += 6; | |
220 | rows -= 6; | |
221 | } | |
222 | switch (rows) { | |
223 | case 6: | |
224 | gf_6vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
225 | break; | |
226 | case 5: | |
227 | gf_5vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
228 | break; | |
229 | case 4: | |
230 | gf_4vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
231 | break; | |
232 | case 3: | |
233 | gf_3vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
234 | break; | |
235 | case 2: | |
236 | gf_2vect_mad_sse(len, k, vec_i, g_tbls, data, coding); | |
237 | break; | |
238 | case 1: | |
239 | gf_vect_mad_sse(len, k, vec_i, g_tbls, data, *coding); | |
240 | break; | |
241 | case 0: | |
242 | break; | |
243 | } | |
244 | ||
245 | } | |
246 | ||
247 | void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
248 | unsigned char *data, unsigned char **coding) | |
249 | { | |
250 | if (len < 16) { | |
251 | ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); | |
252 | return; | |
253 | } | |
254 | while (rows > 6) { | |
255 | gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
256 | g_tbls += 6 * k * 32; | |
257 | coding += 6; | |
258 | rows -= 6; | |
259 | } | |
260 | switch (rows) { | |
261 | case 6: | |
262 | gf_6vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
263 | break; | |
264 | case 5: | |
265 | gf_5vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
266 | break; | |
267 | case 4: | |
268 | gf_4vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
269 | break; | |
270 | case 3: | |
271 | gf_3vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
272 | break; | |
273 | case 2: | |
274 | gf_2vect_mad_avx(len, k, vec_i, g_tbls, data, coding); | |
275 | break; | |
276 | case 1: | |
277 | gf_vect_mad_avx(len, k, vec_i, g_tbls, data, *coding); | |
278 | break; | |
279 | case 0: | |
280 | break; | |
281 | } | |
282 | ||
283 | } | |
284 | ||
285 | void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, | |
286 | unsigned char *data, unsigned char **coding) | |
287 | { | |
288 | if (len < 32) { | |
289 | ec_encode_data_update_base(len, k, rows, vec_i, g_tbls, data, coding); | |
290 | return; | |
291 | } | |
292 | while (rows > 6) { | |
293 | gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
294 | g_tbls += 6 * k * 32; | |
295 | coding += 6; | |
296 | rows -= 6; | |
297 | } | |
298 | switch (rows) { | |
299 | case 6: | |
300 | gf_6vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
301 | break; | |
302 | case 5: | |
303 | gf_5vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
304 | break; | |
305 | case 4: | |
306 | gf_4vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
307 | break; | |
308 | case 3: | |
309 | gf_3vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
310 | break; | |
311 | case 2: | |
312 | gf_2vect_mad_avx2(len, k, vec_i, g_tbls, data, coding); | |
313 | break; | |
314 | case 1: | |
315 | gf_vect_mad_avx2(len, k, vec_i, g_tbls, data, *coding); | |
316 | break; | |
317 | case 0: | |
318 | break; | |
319 | } | |
320 | ||
321 | } | |
322 | ||
323 | #endif //__WORDSIZE == 64 || _WIN64 || __x86_64__ | |
324 | ||
325 | struct slver { | |
326 | UINT16 snum; | |
327 | UINT8 ver; | |
328 | UINT8 core; | |
329 | }; | |
330 | ||
331 | // Version info | |
332 | struct slver ec_init_tables_slver_00010068; | |
333 | struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 }; | |
334 | ||
335 | struct slver ec_encode_data_sse_slver_00020069; | |
336 | struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 }; |