]> git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/libpmem2/x86_64/memset/memset_t_sse2.c
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / libpmem2 / x86_64 / memset / memset_t_sse2.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2017-2020, Intel Corporation */
3
4 #include <immintrin.h>
5 #include <stddef.h>
6 #include <stdint.h>
7
8 #include "pmem2_arch.h"
9 #include "flush.h"
10 #include "memcpy_memset.h"
11 #include "memset_sse2.h"
12
13 static force_inline void
14 mm_store_si128(char *dest, unsigned idx, __m128i src)
15 {
16 _mm_store_si128((__m128i *)dest + idx, src);
17 }
18
19 static force_inline void
20 memset_mov4x64b(char *dest, __m128i xmm, flush64b_fn flush64b)
21 {
22 mm_store_si128(dest, 0, xmm);
23 mm_store_si128(dest, 1, xmm);
24 mm_store_si128(dest, 2, xmm);
25 mm_store_si128(dest, 3, xmm);
26 mm_store_si128(dest, 4, xmm);
27 mm_store_si128(dest, 5, xmm);
28 mm_store_si128(dest, 6, xmm);
29 mm_store_si128(dest, 7, xmm);
30 mm_store_si128(dest, 8, xmm);
31 mm_store_si128(dest, 9, xmm);
32 mm_store_si128(dest, 10, xmm);
33 mm_store_si128(dest, 11, xmm);
34 mm_store_si128(dest, 12, xmm);
35 mm_store_si128(dest, 13, xmm);
36 mm_store_si128(dest, 14, xmm);
37 mm_store_si128(dest, 15, xmm);
38
39 flush64b(dest + 0 * 64);
40 flush64b(dest + 1 * 64);
41 flush64b(dest + 2 * 64);
42 flush64b(dest + 3 * 64);
43 }
44
45 static force_inline void
46 memset_mov2x64b(char *dest, __m128i xmm, flush64b_fn flush64b)
47 {
48 mm_store_si128(dest, 0, xmm);
49 mm_store_si128(dest, 1, xmm);
50 mm_store_si128(dest, 2, xmm);
51 mm_store_si128(dest, 3, xmm);
52 mm_store_si128(dest, 4, xmm);
53 mm_store_si128(dest, 5, xmm);
54 mm_store_si128(dest, 6, xmm);
55 mm_store_si128(dest, 7, xmm);
56
57 flush64b(dest + 0 * 64);
58 flush64b(dest + 1 * 64);
59 }
60
61 static force_inline void
62 memset_mov1x64b(char *dest, __m128i xmm, flush64b_fn flush64b)
63 {
64 mm_store_si128(dest, 0, xmm);
65 mm_store_si128(dest, 1, xmm);
66 mm_store_si128(dest, 2, xmm);
67 mm_store_si128(dest, 3, xmm);
68
69 flush64b(dest + 0 * 64);
70 }
71
72 static force_inline void
73 memset_mov_sse2(char *dest, int c, size_t len,
74 flush_fn flush, flush64b_fn flush64b)
75 {
76 __m128i xmm = _mm_set1_epi8((char)c);
77
78 size_t cnt = (uint64_t)dest & 63;
79 if (cnt > 0) {
80 cnt = 64 - cnt;
81
82 if (cnt > len)
83 cnt = len;
84
85 memset_small_sse2(dest, xmm, cnt, flush);
86
87 dest += cnt;
88 len -= cnt;
89 }
90
91 while (len >= 4 * 64) {
92 memset_mov4x64b(dest, xmm, flush64b);
93 dest += 4 * 64;
94 len -= 4 * 64;
95 }
96
97 if (len >= 2 * 64) {
98 memset_mov2x64b(dest, xmm, flush64b);
99 dest += 2 * 64;
100 len -= 2 * 64;
101 }
102
103 if (len >= 1 * 64) {
104 memset_mov1x64b(dest, xmm, flush64b);
105
106 dest += 1 * 64;
107 len -= 1 * 64;
108 }
109
110 if (len)
111 memset_small_sse2(dest, xmm, len, flush);
112 }
113
114 void
115 memset_mov_sse2_noflush(char *dest, int c, size_t len)
116 {
117 LOG(15, "dest %p c %d len %zu", dest, c, len);
118
119 memset_mov_sse2(dest, c, len, noflush, noflush64b);
120 }
121
122 void
123 memset_mov_sse2_empty(char *dest, int c, size_t len)
124 {
125 LOG(15, "dest %p c %d len %zu", dest, c, len);
126
127 memset_mov_sse2(dest, c, len, flush_empty_nolog, flush64b_empty);
128 }
129
130 void
131 memset_mov_sse2_clflush(char *dest, int c, size_t len)
132 {
133 LOG(15, "dest %p c %d len %zu", dest, c, len);
134
135 memset_mov_sse2(dest, c, len, flush_clflush_nolog, pmem_clflush);
136 }
137
138 void
139 memset_mov_sse2_clflushopt(char *dest, int c, size_t len)
140 {
141 LOG(15, "dest %p c %d len %zu", dest, c, len);
142
143 memset_mov_sse2(dest, c, len, flush_clflushopt_nolog,
144 pmem_clflushopt);
145 }
146
147 void
148 memset_mov_sse2_clwb(char *dest, int c, size_t len)
149 {
150 LOG(15, "dest %p c %d len %zu", dest, c, len);
151
152 memset_mov_sse2(dest, c, len, flush_clwb_nolog, pmem_clwb);
153 }