]>
git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/libpmem2/x86_64/memcpy/memcpy_t_sse2.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2017-2020, Intel Corporation */
8 #include "pmem2_arch.h"
10 #include "memcpy_memset.h"
11 #include "memcpy_sse2.h"
14 static force_inline __m128i
15 mm_loadu_si128(const char *src
, unsigned idx
)
17 return _mm_loadu_si128((const __m128i
*)src
+ idx
);
20 static force_inline
void
21 mm_store_si128(char *dest
, unsigned idx
, __m128i src
)
23 _mm_store_si128((__m128i
*)dest
+ idx
, src
);
26 static force_inline
void
27 memmove_mov4x64b(char *dest
, const char *src
, flush64b_fn flush64b
)
29 __m128i xmm0
= mm_loadu_si128(src
, 0);
30 __m128i xmm1
= mm_loadu_si128(src
, 1);
31 __m128i xmm2
= mm_loadu_si128(src
, 2);
32 __m128i xmm3
= mm_loadu_si128(src
, 3);
33 __m128i xmm4
= mm_loadu_si128(src
, 4);
34 __m128i xmm5
= mm_loadu_si128(src
, 5);
35 __m128i xmm6
= mm_loadu_si128(src
, 6);
36 __m128i xmm7
= mm_loadu_si128(src
, 7);
37 __m128i xmm8
= mm_loadu_si128(src
, 8);
38 __m128i xmm9
= mm_loadu_si128(src
, 9);
39 __m128i xmm10
= mm_loadu_si128(src
, 10);
40 __m128i xmm11
= mm_loadu_si128(src
, 11);
41 __m128i xmm12
= mm_loadu_si128(src
, 12);
42 __m128i xmm13
= mm_loadu_si128(src
, 13);
43 __m128i xmm14
= mm_loadu_si128(src
, 14);
44 __m128i xmm15
= mm_loadu_si128(src
, 15);
46 mm_store_si128(dest
, 0, xmm0
);
47 mm_store_si128(dest
, 1, xmm1
);
48 mm_store_si128(dest
, 2, xmm2
);
49 mm_store_si128(dest
, 3, xmm3
);
50 mm_store_si128(dest
, 4, xmm4
);
51 mm_store_si128(dest
, 5, xmm5
);
52 mm_store_si128(dest
, 6, xmm6
);
53 mm_store_si128(dest
, 7, xmm7
);
54 mm_store_si128(dest
, 8, xmm8
);
55 mm_store_si128(dest
, 9, xmm9
);
56 mm_store_si128(dest
, 10, xmm10
);
57 mm_store_si128(dest
, 11, xmm11
);
58 mm_store_si128(dest
, 12, xmm12
);
59 mm_store_si128(dest
, 13, xmm13
);
60 mm_store_si128(dest
, 14, xmm14
);
61 mm_store_si128(dest
, 15, xmm15
);
63 flush64b(dest
+ 0 * 64);
64 flush64b(dest
+ 1 * 64);
65 flush64b(dest
+ 2 * 64);
66 flush64b(dest
+ 3 * 64);
69 static force_inline
void
70 memmove_mov2x64b(char *dest
, const char *src
, flush64b_fn flush64b
)
72 __m128i xmm0
= mm_loadu_si128(src
, 0);
73 __m128i xmm1
= mm_loadu_si128(src
, 1);
74 __m128i xmm2
= mm_loadu_si128(src
, 2);
75 __m128i xmm3
= mm_loadu_si128(src
, 3);
76 __m128i xmm4
= mm_loadu_si128(src
, 4);
77 __m128i xmm5
= mm_loadu_si128(src
, 5);
78 __m128i xmm6
= mm_loadu_si128(src
, 6);
79 __m128i xmm7
= mm_loadu_si128(src
, 7);
81 mm_store_si128(dest
, 0, xmm0
);
82 mm_store_si128(dest
, 1, xmm1
);
83 mm_store_si128(dest
, 2, xmm2
);
84 mm_store_si128(dest
, 3, xmm3
);
85 mm_store_si128(dest
, 4, xmm4
);
86 mm_store_si128(dest
, 5, xmm5
);
87 mm_store_si128(dest
, 6, xmm6
);
88 mm_store_si128(dest
, 7, xmm7
);
90 flush64b(dest
+ 0 * 64);
91 flush64b(dest
+ 1 * 64);
94 static force_inline
void
95 memmove_mov1x64b(char *dest
, const char *src
, flush64b_fn flush64b
)
97 __m128i xmm0
= mm_loadu_si128(src
, 0);
98 __m128i xmm1
= mm_loadu_si128(src
, 1);
99 __m128i xmm2
= mm_loadu_si128(src
, 2);
100 __m128i xmm3
= mm_loadu_si128(src
, 3);
102 mm_store_si128(dest
, 0, xmm0
);
103 mm_store_si128(dest
, 1, xmm1
);
104 mm_store_si128(dest
, 2, xmm2
);
105 mm_store_si128(dest
, 3, xmm3
);
107 flush64b(dest
+ 0 * 64);
110 static force_inline
void
111 memmove_mov_sse_fw(char *dest
, const char *src
, size_t len
,
112 flush_fn flush
, flush64b_fn flush64b
)
114 size_t cnt
= (uint64_t)dest
& 63;
121 memmove_small_sse2(dest
, src
, cnt
, flush
);
128 while (len
>= 4 * 64) {
129 memmove_mov4x64b(dest
, src
, flush64b
);
136 memmove_mov2x64b(dest
, src
, flush64b
);
143 memmove_mov1x64b(dest
, src
, flush64b
);
151 memmove_small_sse2(dest
, src
, len
, flush
);
154 static force_inline
void
155 memmove_mov_sse_bw(char *dest
, const char *src
, size_t len
,
156 flush_fn flush
, flush64b_fn flush64b
)
161 size_t cnt
= (uint64_t)dest
& 63;
169 memmove_small_sse2(dest
, src
, cnt
, flush
);
172 while (len
>= 4 * 64) {
176 memmove_mov4x64b(dest
, src
, flush64b
);
183 memmove_mov2x64b(dest
, src
, flush64b
);
190 memmove_mov1x64b(dest
, src
, flush64b
);
194 memmove_small_sse2(dest
- len
, src
- len
, len
, flush
);
197 static force_inline
void
198 memmove_mov_sse2(char *dest
, const char *src
, size_t len
,
199 flush_fn flush
, flush64b_fn flush64b
)
201 if ((uintptr_t)dest
- (uintptr_t)src
>= len
)
202 memmove_mov_sse_fw(dest
, src
, len
, flush
, flush64b
);
204 memmove_mov_sse_bw(dest
, src
, len
, flush
, flush64b
);
208 memmove_mov_sse2_noflush(char *dest
, const char *src
, size_t len
)
210 LOG(15, "dest %p src %p len %zu", dest
, src
, len
);
212 memmove_mov_sse2(dest
, src
, len
, noflush
, noflush64b
);
216 memmove_mov_sse2_empty(char *dest
, const char *src
, size_t len
)
218 LOG(15, "dest %p src %p len %zu", dest
, src
, len
);
220 memmove_mov_sse2(dest
, src
, len
, flush_empty_nolog
, flush64b_empty
);
224 memmove_mov_sse2_clflush(char *dest
, const char *src
, size_t len
)
226 LOG(15, "dest %p src %p len %zu", dest
, src
, len
);
228 memmove_mov_sse2(dest
, src
, len
, flush_clflush_nolog
, pmem_clflush
);
232 memmove_mov_sse2_clflushopt(char *dest
, const char *src
, size_t len
)
234 LOG(15, "dest %p src %p len %zu", dest
, src
, len
);
236 memmove_mov_sse2(dest
, src
, len
, flush_clflushopt_nolog
,
241 memmove_mov_sse2_clwb(char *dest
, const char *src
, size_t len
)
243 LOG(15, "dest %p src %p len %zu", dest
, src
, len
);
245 memmove_mov_sse2(dest
, src
, len
, flush_clwb_nolog
, pmem_clwb
);