]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/test/test/test_barrier.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / test / test / test_barrier.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
3 */
4
5 /*
6 * This is a simple functional test for rte_smp_mb() implementation.
7 * I.E. make sure that LOAD and STORE operations that precede the
8 * rte_smp_mb() call are globally visible across the lcores
9 * before the the LOAD and STORE operations that follows it.
10 * The test uses simple implementation of Peterson's lock algorithm
11 * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
12 * for two execution units to make sure that rte_smp_mb() prevents
13 * store-load reordering to happen.
14 * Also when executed on a single lcore could be used as a approxiamate
15 * estimation of number of cycles particular implementation of rte_smp_mb()
16 * will take.
17 */
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdint.h>
22 #include <inttypes.h>
23
24 #include <rte_memory.h>
25 #include <rte_per_lcore.h>
26 #include <rte_launch.h>
27 #include <rte_atomic.h>
28 #include <rte_eal.h>
29 #include <rte_lcore.h>
30 #include <rte_pause.h>
31 #include <rte_random.h>
32 #include <rte_cycles.h>
33 #include <rte_vect.h>
34 #include <rte_debug.h>
35
36 #include "test.h"
37
38 #define ADD_MAX 8
39 #define ITER_MAX 0x1000000
40
41 enum plock_use_type {
42 USE_MB,
43 USE_SMP_MB,
44 USE_NUM
45 };
46
47 struct plock {
48 volatile uint32_t flag[2];
49 volatile uint32_t victim;
50 enum plock_use_type utype;
51 };
52
53 /*
54 * Lock plus protected by it two counters.
55 */
56 struct plock_test {
57 struct plock lock;
58 uint32_t val;
59 uint32_t iter;
60 };
61
62 /*
63 * Each active lcore shares plock_test struct with it's left and right
64 * neighbours.
65 */
66 struct lcore_plock_test {
67 struct plock_test *pt[2]; /* shared, lock-protected data */
68 uint32_t sum[2]; /* local copy of the shared data */
69 uint32_t iter; /* number of iterations to perfom */
70 uint32_t lc; /* given lcore id */
71 };
72
73 static inline void
74 store_load_barrier(uint32_t utype)
75 {
76 if (utype == USE_MB)
77 rte_mb();
78 else if (utype == USE_SMP_MB)
79 rte_smp_mb();
80 else
81 RTE_VERIFY(0);
82 }
83
84 /*
85 * Peterson lock implementation.
86 */
87 static void
88 plock_lock(struct plock *l, uint32_t self)
89 {
90 uint32_t other;
91
92 other = self ^ 1;
93
94 l->flag[self] = 1;
95 l->victim = self;
96
97 store_load_barrier(l->utype);
98
99 while (l->flag[other] == 1 && l->victim == self)
100 rte_pause();
101 }
102
103 static void
104 plock_unlock(struct plock *l, uint32_t self)
105 {
106 rte_smp_wmb();
107 l->flag[self] = 0;
108 }
109
110 static void
111 plock_reset(struct plock *l, enum plock_use_type utype)
112 {
113 memset(l, 0, sizeof(*l));
114 l->utype = utype;
115 }
116
117 /*
118 * grab the lock, update both counters, release the lock.
119 */
120 static void
121 plock_add(struct plock_test *pt, uint32_t self, uint32_t n)
122 {
123 plock_lock(&pt->lock, self);
124 pt->iter++;
125 pt->val += n;
126 plock_unlock(&pt->lock, self);
127 }
128
129 static int
130 plock_test1_lcore(void *data)
131 {
132 uint64_t tm;
133 uint32_t i, lc, ln, n;
134 struct lcore_plock_test *lpt;
135
136 lpt = data;
137 lc = rte_lcore_id();
138
139 /* find lcore_plock_test struct for given lcore */
140 for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--)
141 ;
142
143 if (ln == 0) {
144 printf("%s(%u) error at init\n", __func__, lc);
145 return -1;
146 }
147
148 n = rte_rand() % ADD_MAX;
149 tm = rte_get_timer_cycles();
150
151 /*
152 * for each iteration:
153 * - update shared, locked protected data in a safe manner
154 * - update local copy of the shared data
155 */
156 for (i = 0; i != lpt->iter; i++) {
157
158 plock_add(lpt->pt[0], 0, n);
159 plock_add(lpt->pt[1], 1, n);
160
161 lpt->sum[0] += n;
162 lpt->sum[1] += n;
163
164 n = (n + 1) % ADD_MAX;
165 }
166
167 tm = rte_get_timer_cycles() - tm;
168
169 printf("%s(%u): %u iterations finished, in %" PRIu64
170 " cycles, %#Lf cycles/iteration, "
171 "local sum={%u, %u}\n",
172 __func__, lc, i, tm, (long double)tm / i,
173 lpt->sum[0], lpt->sum[1]);
174 return 0;
175 }
176
177 /*
178 * For N active lcores we allocate N+1 lcore_plock_test structures.
179 * Each active lcore shares one lcore_plock_test structure with its
180 * left lcore neighbor and one lcore_plock_test structure with its
181 * right lcore neighbor.
182 * During the test each lcore updates data in both shared structures and
183 * its local copies. Then at validation phase we check that our shared
184 * and local data are the same.
185 */
186 static int
187 plock_test(uint32_t iter, enum plock_use_type utype)
188 {
189 int32_t rc;
190 uint32_t i, lc, n;
191 uint32_t *sum;
192 struct plock_test *pt;
193 struct lcore_plock_test *lpt;
194
195 /* init phase, allocate and initialize shared data */
196
197 n = rte_lcore_count();
198 pt = calloc(n + 1, sizeof(*pt));
199 lpt = calloc(n, sizeof(*lpt));
200 sum = calloc(n + 1, sizeof(*sum));
201
202 printf("%s(iter=%u, utype=%u) started on %u lcores\n",
203 __func__, iter, utype, n);
204
205 if (pt == NULL || lpt == NULL) {
206 printf("%s: failed to allocate memory for %u lcores\n",
207 __func__, n);
208 free(pt);
209 free(lpt);
210 free(sum);
211 return -ENOMEM;
212 }
213
214 for (i = 0; i != n + 1; i++)
215 plock_reset(&pt[i].lock, utype);
216
217 i = 0;
218 RTE_LCORE_FOREACH(lc) {
219
220 lpt[i].lc = lc;
221 lpt[i].iter = iter;
222 lpt[i].pt[0] = pt + i;
223 lpt[i].pt[1] = pt + i + 1;
224 i++;
225 }
226
227 lpt[i - 1].pt[1] = pt;
228
229 for (i = 0; i != n; i++)
230 printf("lpt[%u]={lc=%u, pt={%p, %p},};\n",
231 i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]);
232
233
234 /* test phase - start and wait for completion on each active lcore */
235
236 rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MASTER);
237 rte_eal_mp_wait_lcore();
238
239 /* validation phase - make sure that shared and local data match */
240
241 for (i = 0; i != n; i++) {
242 sum[i] += lpt[i].sum[0];
243 sum[i + 1] += lpt[i].sum[1];
244 }
245
246 sum[0] += sum[i];
247
248 rc = 0;
249 for (i = 0; i != n; i++) {
250 printf("%s: sum[%u]=%u, pt[%u].val=%u, pt[%u].iter=%u;\n",
251 __func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
252
253 /* race condition occurred, lock doesn't work properly */
254 if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) {
255 printf("error: local and shared sums don't much\n");
256 rc = -1;
257 }
258 }
259
260 free(pt);
261 free(lpt);
262 free(sum);
263
264 printf("%s(utype=%u) returns %d\n", __func__, utype, rc);
265 return rc;
266 }
267
268 static int
269 test_barrier(void)
270 {
271 int32_t i, ret, rc[USE_NUM];
272
273 for (i = 0; i != RTE_DIM(rc); i++)
274 rc[i] = plock_test(ITER_MAX, i);
275
276 ret = 0;
277 for (i = 0; i != RTE_DIM(rc); i++) {
278 printf("%s for utype=%d %s\n",
279 __func__, i, rc[i] == 0 ? "passed" : "failed");
280 ret |= rc[i];
281 }
282
283 return ret;
284 }
285
286 REGISTER_TEST_COMMAND(barrier_autotest, test_barrier);