]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / include / memcpy_inline.h
1 /**********************************************************************
2 Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29
30
31 /**
32 * @file memcpy_inline.h
33 * @brief Defines intrinsic memcpy functions used by the new hashing API
34 *
35 */
36
37 #ifndef _MEMCPY_H_
38 #define _MEMCPY_H_
39
40 #if defined(__i386__) || defined(__x86_64__) || defined( _M_X64) \
41 || defined(_M_IX86)
42 #include "intrinreg.h"
43 #endif
44 #include <string.h>
45 #include <assert.h>
46
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
50
51 #if defined(__i386__) || defined(__x86_64__) || defined( _M_X64) \
52 || defined(_M_IX86)
53
54 #define memcpy_varlen memcpy_sse_varlen
55 #define memcpy_fixedlen memcpy_sse_fixedlen
56
57 #define memclr_varlen memclr_sse_varlen
58 #define memclr_fixedlen memclr_sse_fixedlen
59
60 static inline void memcpy_lte32_sse_fixedlen(void* dst, const void* src, size_t nbytes);
61 static inline void memcpy_gte16_sse_fixedlen(void* dst, const void* src, size_t nbytes);
62 static inline void memcpy_sse_fixedlen (void* dst, const void* src, size_t nbytes);
63
64 static inline void memcpy_lte32_sse_varlen (void* dst, const void* src, size_t nbytes);
65 static inline void memcpy_gte16_sse_varlen (void* dst, const void* src, size_t nbytes);
66 static inline void memcpy_sse_varlen (void* dst, const void* src, size_t nbytes);
67
68
69 static inline void memclr_lte32_sse_fixedlen(void* dst, size_t nbytes);
70 static inline void memclr_gte16_sse_fixedlen(void* dst, size_t nbytes);
71 static inline void memclr_sse_fixedlen (void* dst, size_t nbytes);
72
73 static inline void memclr_lte32_sse_varlen (void* dst, size_t nbytes);
74 static inline void memclr_gte16_sse_varlen (void* dst, size_t nbytes);
75 static inline void memclr_sse_varlen (void* dst, size_t nbytes);
76
77 #define MEMCPY_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, src, nbytes) \
78 do { \
79 intrinreg##N head; \
80 intrinreg##N tail; \
81 assert(N <= nbytes && nbytes <= 2*N); \
82 if(N == 1 || (fixedwidth && nbytes==N) ) { \
83 head = load_intrinreg##N(src); \
84 store_intrinreg##N(dst, head); \
85 } \
86 else { \
87 head = load_intrinreg##N(src); \
88 tail = load_intrinreg##N((const void*)((const char*)src + (nbytes - N))); \
89 store_intrinreg##N(dst, head); \
90 store_intrinreg##N((void*)((char*)dst + (nbytes - N)), tail); \
91 } \
92 } while(0)
93
94 #define MEMCLR_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, nbytes) \
95 do { \
96 const intrinreg##N zero = {0}; \
97 assert(N <= nbytes && nbytes <= 2*N); \
98 if(N == 1 || (fixedwidth && nbytes==N) ) { \
99 store_intrinreg##N(dst, zero); \
100 } \
101 else { \
102 store_intrinreg##N(dst, zero); \
103 store_intrinreg##N((void*)((char*)dst + (nbytes - N)), zero); \
104 } \
105 } while(0)
106
107 // Define load/store functions uniformly.
108
109 #define load_intrinreg16(src) _mm_loadu_ps((const float*) src)
110 #define store_intrinreg16(dst,val) _mm_storeu_ps((float*) dst, val)
111
112 static inline intrinreg8 load_intrinreg8(const void *src)
113 {
114 return *(intrinreg8 *) src;
115 }
116
117 static inline void store_intrinreg8(void *dst, intrinreg8 val)
118 {
119 *(intrinreg8 *) dst = val;
120 }
121
122 static inline intrinreg4 load_intrinreg4(const void *src)
123 {
124 return *(intrinreg4 *) src;
125 }
126
127 static inline void store_intrinreg4(void *dst, intrinreg4 val)
128 {
129 *(intrinreg4 *) dst = val;
130 }
131
132 static inline intrinreg2 load_intrinreg2(const void *src)
133 {
134 return *(intrinreg2 *) src;
135 }
136
137 static inline void store_intrinreg2(void *dst, intrinreg2 val)
138 {
139 *(intrinreg2 *) dst = val;
140 }
141
142 static inline intrinreg1 load_intrinreg1(const void *src)
143 {
144 return *(intrinreg1 *) src;
145 }
146
147 static inline void store_intrinreg1(void *dst, intrinreg1 val)
148 {
149 *(intrinreg1 *) dst = val;
150 }
151
152 static inline void memcpy_gte16_sse_fixedlen(void *dst, const void *src, size_t nbytes)
153 {
154 size_t i;
155 size_t j;
156 intrinreg16 pool[4];
157 size_t remaining_moves;
158 size_t tail_offset;
159 int do_tail;
160 assert(nbytes >= 16);
161
162 for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) {
163 for (j = 0; j < 4; j++)
164 pool[j] =
165 load_intrinreg16((const void *)((const char *)src + i + 16 * j));
166 for (j = 0; j < 4; j++)
167 store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]);
168 }
169
170 remaining_moves = (nbytes - i) / 16;
171 tail_offset = nbytes - 16;
172 do_tail = (tail_offset & (16 - 1));
173
174 for (j = 0; j < remaining_moves; j++)
175 pool[j] = load_intrinreg16((const void *)((const char *)src + i + 16 * j));
176
177 if (do_tail)
178 pool[j] = load_intrinreg16((const void *)((const char *)src + tail_offset));
179
180 for (j = 0; j < remaining_moves; j++)
181 store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]);
182
183 if (do_tail)
184 store_intrinreg16((void *)((char *)dst + tail_offset), pool[j]);
185 }
186
187 static inline void memclr_gte16_sse_fixedlen(void *dst, size_t nbytes)
188 {
189 size_t i;
190 size_t j;
191 const intrinreg16 zero = { 0 };
192 size_t remaining_moves;
193 size_t tail_offset;
194 int do_tail;
195 assert(nbytes >= 16);
196
197 for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4)
198 for (j = 0; j < 4; j++)
199 store_intrinreg16((void *)((char *)dst + i + 16 * j), zero);
200
201 remaining_moves = (nbytes - i) / 16;
202 tail_offset = nbytes - 16;
203 do_tail = (tail_offset & (16 - 1));
204
205 for (j = 0; j < remaining_moves; j++)
206 store_intrinreg16((void *)((char *)dst + i + 16 * j), zero);
207
208 if (do_tail)
209 store_intrinreg16((void *)((char *)dst + tail_offset), zero);
210 }
211
212 static inline void memcpy_lte32_sse_fixedlen(void *dst, const void *src, size_t nbytes)
213 {
214 assert(nbytes <= 32);
215 if (nbytes >= 16)
216 MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 1, dst, src, nbytes);
217 else if (nbytes >= 8)
218 MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 1, dst, src, nbytes);
219 else if (nbytes >= 4)
220 MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 1, dst, src, nbytes);
221 else if (nbytes >= 2)
222 MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 1, dst, src, nbytes);
223 else if (nbytes >= 1)
224 MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 1, dst, src, nbytes);
225 }
226
227 static inline void memclr_lte32_sse_fixedlen(void *dst, size_t nbytes)
228 {
229 assert(nbytes <= 32);
230 if (nbytes >= 16)
231 MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 1, dst, nbytes);
232 else if (nbytes >= 8)
233 MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 1, dst, nbytes);
234 else if (nbytes >= 4)
235 MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 1, dst, nbytes);
236 else if (nbytes >= 2)
237 MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 1, dst, nbytes);
238 else if (nbytes >= 1)
239 MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 1, dst, nbytes);
240 }
241
242 static inline void memcpy_lte32_sse_varlen(void *dst, const void *src, size_t nbytes)
243 {
244 assert(nbytes <= 32);
245 if (nbytes >= 16)
246 MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 0, dst, src, nbytes);
247 else if (nbytes >= 8)
248 MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 0, dst, src, nbytes);
249 else if (nbytes >= 4)
250 MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 0, dst, src, nbytes);
251 else if (nbytes >= 2)
252 MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 0, dst, src, nbytes);
253 else if (nbytes >= 1)
254 MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 0, dst, src, nbytes);
255 }
256
257 static inline void memclr_lte32_sse_varlen(void *dst, size_t nbytes)
258 {
259 assert(nbytes <= 32);
260 if (nbytes >= 16)
261 MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 0, dst, nbytes);
262 else if (nbytes >= 8)
263 MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 0, dst, nbytes);
264 else if (nbytes >= 4)
265 MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 0, dst, nbytes);
266 else if (nbytes >= 2)
267 MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 0, dst, nbytes);
268 else if (nbytes >= 1)
269 MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 0, dst, nbytes);
270 }
271
272 static inline void memcpy_gte16_sse_varlen(void *dst, const void *src, size_t nbytes)
273 {
274 size_t i = 0;
275 intrinreg16 tail;
276
277 assert(nbytes >= 16);
278
279 while (i + 128 <= nbytes) {
280 memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
281 (const void *)((const char *)src + i), 128);
282 i += 128;
283 }
284 if (i + 64 <= nbytes) {
285 memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
286 (const void *)((const char *)src + i), 64);
287 i += 64;
288 }
289 if (i + 32 <= nbytes) {
290 memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
291 (const void *)((const char *)src + i), 32);
292 i += 32;
293 }
294 if (i + 16 <= nbytes) {
295 memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
296 (const void *)((const char *)src + i), 16);
297 }
298
299 i = nbytes - 16;
300 tail = load_intrinreg16((const void *)((const char *)src + i));
301 store_intrinreg16((void *)((char *)dst + i), tail);
302 }
303
304 static inline void memclr_gte16_sse_varlen(void *dst, size_t nbytes)
305 {
306 size_t i = 0;
307 const intrinreg16 zero = { 0 };
308
309 assert(nbytes >= 16);
310
311 while (i + 128 <= nbytes) {
312 memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 128);
313 i += 128;
314 }
315 if (i + 64 <= nbytes) {
316 memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 64);
317 i += 64;
318 }
319 if (i + 32 <= nbytes) {
320 memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 32);
321 i += 32;
322 }
323 if (i + 16 <= nbytes) {
324 memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 16);
325 }
326
327 i = nbytes - 16;
328 store_intrinreg16((void *)((char *)dst + i), zero);
329 }
330
331 static inline void memcpy_sse_fixedlen(void *dst, const void *src, size_t nbytes)
332 {
333 if (nbytes >= 16)
334 memcpy_gte16_sse_fixedlen(dst, src, nbytes);
335 else
336 memcpy_lte32_sse_fixedlen(dst, src, nbytes);
337 }
338
339 static inline void memclr_sse_fixedlen(void *dst, size_t nbytes)
340 {
341 if (nbytes >= 16)
342 memclr_gte16_sse_fixedlen(dst, nbytes);
343 else
344 memclr_lte32_sse_fixedlen(dst, nbytes);
345 }
346
347 static inline void memcpy_sse_varlen(void *dst, const void *src, size_t nbytes)
348 {
349 if (nbytes >= 16)
350 memcpy_gte16_sse_varlen(dst, src, nbytes);
351 else
352 memcpy_lte32_sse_varlen(dst, src, nbytes);
353 }
354
355 static inline void memclr_sse_varlen(void *dst, size_t nbytes)
356 {
357 if (nbytes >= 16)
358 memclr_gte16_sse_varlen(dst, nbytes);
359 else
360 memclr_lte32_sse_varlen(dst, nbytes);
361 }
362 #else
363 #define memcpy_varlen memcpy
364 #define memcpy_fixedlen memcpy
365
366 #define memclr_varlen(dst,n) memset(dst,0,n)
367 #define memclr_fixedlen(dst,n) memset(dst,0,n)
368
369 #endif
370
371 #ifdef __cplusplus
372 }
373 #endif
374
375 #endif // __MEMCPY_H