]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/lib/librte_eal/common/eal_common_fbarray.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_eal / common / eal_common_fbarray.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017-2018 Intel Corporation
3 */
4
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <sys/mman.h>
9 #include <stdint.h>
10 #include <errno.h>
11 #include <sys/file.h>
12 #include <string.h>
13
14 #include <rte_common.h>
15 #include <rte_log.h>
16 #include <rte_errno.h>
17 #include <rte_spinlock.h>
18 #include <rte_tailq.h>
19
20 #include "eal_filesystem.h"
21 #include "eal_private.h"
22
23 #include "rte_fbarray.h"
24
25 #define MASK_SHIFT 6ULL
26 #define MASK_ALIGN (1ULL << MASK_SHIFT)
27 #define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
28 #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
29 #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
30
31 /*
32 * We use this to keep track of created/attached memory areas to prevent user
33 * errors in API usage.
34 */
35 struct mem_area {
36 TAILQ_ENTRY(mem_area) next;
37 void *addr;
38 size_t len;
39 int fd;
40 };
41 TAILQ_HEAD(mem_area_head, mem_area);
42 /* local per-process tailq */
43 static struct mem_area_head mem_area_tailq =
44 TAILQ_HEAD_INITIALIZER(mem_area_tailq);
45 static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER;
46
47 /*
48 * This is a mask that is always stored at the end of array, to provide fast
49 * way of finding free/used spots without looping through each element.
50 */
51
52 struct used_mask {
53 unsigned int n_masks;
54 uint64_t data[];
55 };
56
57 static size_t
58 calc_mask_size(unsigned int len)
59 {
60 /* mask must be multiple of MASK_ALIGN, even though length of array
61 * itself may not be aligned on that boundary.
62 */
63 len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
64 return sizeof(struct used_mask) +
65 sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
66 }
67
68 static size_t
69 calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
70 {
71 size_t data_sz = elt_sz * len;
72 size_t msk_sz = calc_mask_size(len);
73 return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
74 }
75
76 static struct used_mask *
77 get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
78 {
79 return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
80 }
81
82 static int
83 resize_and_map(int fd, void *addr, size_t len)
84 {
85 char path[PATH_MAX];
86 void *map_addr;
87
88 if (ftruncate(fd, len)) {
89 RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
90 /* pass errno up the chain */
91 rte_errno = errno;
92 return -1;
93 }
94
95 map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
96 MAP_SHARED | MAP_FIXED, fd, 0);
97 if (map_addr != addr) {
98 RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
99 /* pass errno up the chain */
100 rte_errno = errno;
101 return -1;
102 }
103 return 0;
104 }
105
106 static int
107 overlap(const struct mem_area *ma, const void *start, size_t len)
108 {
109 const void *end = RTE_PTR_ADD(start, len);
110 const void *ma_start = ma->addr;
111 const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len);
112
113 /* start overlap? */
114 if (start >= ma_start && start < ma_end)
115 return 1;
116 /* end overlap? */
117 if (end >= ma_start && end < ma_end)
118 return 1;
119 return 0;
120 }
121
122 static int
123 find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
124 bool used)
125 {
126 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
127 arr->len);
128 unsigned int msk_idx, lookahead_idx, first, first_mod;
129 unsigned int last, last_mod;
130 uint64_t last_msk, ignore_msk;
131
132 /*
133 * mask only has granularity of MASK_ALIGN, but start may not be aligned
134 * on that boundary, so construct a special mask to exclude anything we
135 * don't want to see to avoid confusing ctz.
136 */
137 first = MASK_LEN_TO_IDX(start);
138 first_mod = MASK_LEN_TO_MOD(start);
139 ignore_msk = ~((1ULL << first_mod) - 1);
140
141 /* array length may not be aligned, so calculate ignore mask for last
142 * mask index.
143 */
144 last = MASK_LEN_TO_IDX(arr->len);
145 last_mod = MASK_LEN_TO_MOD(arr->len);
146 last_msk = ~(-1ULL << last_mod);
147
148 for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
149 uint64_t cur_msk, lookahead_msk;
150 unsigned int run_start, clz, left;
151 bool found = false;
152 /*
153 * The process of getting n consecutive bits for arbitrary n is
154 * a bit involved, but here it is in a nutshell:
155 *
156 * 1. let n be the number of consecutive bits we're looking for
157 * 2. check if n can fit in one mask, and if so, do n-1
158 * rshift-ands to see if there is an appropriate run inside
159 * our current mask
160 * 2a. if we found a run, bail out early
161 * 2b. if we didn't find a run, proceed
162 * 3. invert the mask and count leading zeroes (that is, count
163 * how many consecutive set bits we had starting from the
164 * end of current mask) as k
165 * 3a. if k is 0, continue to next mask
166 * 3b. if k is not 0, we have a potential run
167 * 4. to satisfy our requirements, next mask must have n-k
168 * consecutive set bits right at the start, so we will do
169 * (n-k-1) rshift-ands and check if first bit is set.
170 *
171 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
172 * we either run out of masks, lose the run, or find what we
173 * were looking for.
174 */
175 cur_msk = msk->data[msk_idx];
176 left = n;
177
178 /* if we're looking for free spaces, invert the mask */
179 if (!used)
180 cur_msk = ~cur_msk;
181
182 /* combine current ignore mask with last index ignore mask */
183 if (msk_idx == last)
184 ignore_msk |= last_msk;
185
186 /* if we have an ignore mask, ignore once */
187 if (ignore_msk) {
188 cur_msk &= ignore_msk;
189 ignore_msk = 0;
190 }
191
192 /* if n can fit in within a single mask, do a search */
193 if (n <= MASK_ALIGN) {
194 uint64_t tmp_msk = cur_msk;
195 unsigned int s_idx;
196 for (s_idx = 0; s_idx < n - 1; s_idx++)
197 tmp_msk &= tmp_msk >> 1ULL;
198 /* we found what we were looking for */
199 if (tmp_msk != 0) {
200 run_start = __builtin_ctzll(tmp_msk);
201 return MASK_GET_IDX(msk_idx, run_start);
202 }
203 }
204
205 /*
206 * we didn't find our run within the mask, or n > MASK_ALIGN,
207 * so we're going for plan B.
208 */
209
210 /* count leading zeroes on inverted mask */
211 if (~cur_msk == 0)
212 clz = sizeof(cur_msk) * 8;
213 else
214 clz = __builtin_clzll(~cur_msk);
215
216 /* if there aren't any runs at the end either, just continue */
217 if (clz == 0)
218 continue;
219
220 /* we have a partial run at the end, so try looking ahead */
221 run_start = MASK_ALIGN - clz;
222 left -= clz;
223
224 for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
225 lookahead_idx++) {
226 unsigned int s_idx, need;
227 lookahead_msk = msk->data[lookahead_idx];
228
229 /* if we're looking for free space, invert the mask */
230 if (!used)
231 lookahead_msk = ~lookahead_msk;
232
233 /* figure out how many consecutive bits we need here */
234 need = RTE_MIN(left, MASK_ALIGN);
235
236 for (s_idx = 0; s_idx < need - 1; s_idx++)
237 lookahead_msk &= lookahead_msk >> 1ULL;
238
239 /* if first bit is not set, we've lost the run */
240 if ((lookahead_msk & 1) == 0) {
241 /*
242 * we've scanned this far, so we know there are
243 * no runs in the space we've lookahead-scanned
244 * as well, so skip that on next iteration.
245 */
246 ignore_msk = ~((1ULL << need) - 1);
247 msk_idx = lookahead_idx;
248 break;
249 }
250
251 left -= need;
252
253 /* check if we've found what we were looking for */
254 if (left == 0) {
255 found = true;
256 break;
257 }
258 }
259
260 /* we didn't find anything, so continue */
261 if (!found)
262 continue;
263
264 return MASK_GET_IDX(msk_idx, run_start);
265 }
266 /* we didn't find anything */
267 rte_errno = used ? ENOENT : ENOSPC;
268 return -1;
269 }
270
271 static int
272 find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
273 {
274 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
275 arr->len);
276 unsigned int idx, first, first_mod;
277 unsigned int last, last_mod;
278 uint64_t last_msk, ignore_msk;
279
280 /*
281 * mask only has granularity of MASK_ALIGN, but start may not be aligned
282 * on that boundary, so construct a special mask to exclude anything we
283 * don't want to see to avoid confusing ctz.
284 */
285 first = MASK_LEN_TO_IDX(start);
286 first_mod = MASK_LEN_TO_MOD(start);
287 ignore_msk = ~((1ULL << first_mod) - 1ULL);
288
289 /* array length may not be aligned, so calculate ignore mask for last
290 * mask index.
291 */
292 last = MASK_LEN_TO_IDX(arr->len);
293 last_mod = MASK_LEN_TO_MOD(arr->len);
294 last_msk = ~(-(1ULL) << last_mod);
295
296 for (idx = first; idx < msk->n_masks; idx++) {
297 uint64_t cur = msk->data[idx];
298 int found;
299
300 /* if we're looking for free entries, invert mask */
301 if (!used)
302 cur = ~cur;
303
304 if (idx == last)
305 cur &= last_msk;
306
307 /* ignore everything before start on first iteration */
308 if (idx == first)
309 cur &= ignore_msk;
310
311 /* check if we have any entries */
312 if (cur == 0)
313 continue;
314
315 /*
316 * find first set bit - that will correspond to whatever it is
317 * that we're looking for.
318 */
319 found = __builtin_ctzll(cur);
320 return MASK_GET_IDX(idx, found);
321 }
322 /* we didn't find anything */
323 rte_errno = used ? ENOENT : ENOSPC;
324 return -1;
325 }
326
327 static int
328 find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
329 {
330 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
331 arr->len);
332 unsigned int idx, first, first_mod;
333 unsigned int last, last_mod;
334 uint64_t last_msk;
335 unsigned int need_len, result = 0;
336
337 /* array length may not be aligned, so calculate ignore mask for last
338 * mask index.
339 */
340 last = MASK_LEN_TO_IDX(arr->len);
341 last_mod = MASK_LEN_TO_MOD(arr->len);
342 last_msk = ~(-(1ULL) << last_mod);
343
344 first = MASK_LEN_TO_IDX(start);
345 first_mod = MASK_LEN_TO_MOD(start);
346 for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
347 uint64_t cur = msk->data[idx];
348 unsigned int run_len;
349
350 need_len = MASK_ALIGN;
351
352 /* if we're looking for free entries, invert mask */
353 if (!used)
354 cur = ~cur;
355
356 /* if this is last mask, ignore everything after last bit */
357 if (idx == last)
358 cur &= last_msk;
359
360 /* ignore everything before start on first iteration */
361 if (idx == first) {
362 cur >>= first_mod;
363 /* at the start, we don't need the full mask len */
364 need_len -= first_mod;
365 }
366
367 /* we will be looking for zeroes, so invert the mask */
368 cur = ~cur;
369
370 /* if mask is zero, we have a complete run */
371 if (cur == 0)
372 continue;
373
374 /*
375 * see if current run ends before mask end.
376 */
377 run_len = __builtin_ctzll(cur);
378
379 /* add however many zeroes we've had in the last run and quit */
380 if (run_len < need_len) {
381 result += run_len;
382 break;
383 }
384 }
385 return result;
386 }
387
388 static int
389 find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
390 bool used)
391 {
392 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
393 arr->len);
394 unsigned int msk_idx, lookbehind_idx, first, first_mod;
395 uint64_t ignore_msk;
396
397 /*
398 * mask only has granularity of MASK_ALIGN, but start may not be aligned
399 * on that boundary, so construct a special mask to exclude anything we
400 * don't want to see to avoid confusing ctz.
401 */
402 first = MASK_LEN_TO_IDX(start);
403 first_mod = MASK_LEN_TO_MOD(start);
404 /* we're going backwards, so mask must start from the top */
405 ignore_msk = first_mod == MASK_ALIGN - 1 ?
406 -1ULL : /* prevent overflow */
407 ~(-1ULL << (first_mod + 1));
408
409 /* go backwards, include zero */
410 msk_idx = first;
411 do {
412 uint64_t cur_msk, lookbehind_msk;
413 unsigned int run_start, run_end, ctz, left;
414 bool found = false;
415 /*
416 * The process of getting n consecutive bits from the top for
417 * arbitrary n is a bit involved, but here it is in a nutshell:
418 *
419 * 1. let n be the number of consecutive bits we're looking for
420 * 2. check if n can fit in one mask, and if so, do n-1
421 * lshift-ands to see if there is an appropriate run inside
422 * our current mask
423 * 2a. if we found a run, bail out early
424 * 2b. if we didn't find a run, proceed
425 * 3. invert the mask and count trailing zeroes (that is, count
426 * how many consecutive set bits we had starting from the
427 * start of current mask) as k
428 * 3a. if k is 0, continue to next mask
429 * 3b. if k is not 0, we have a potential run
430 * 4. to satisfy our requirements, next mask must have n-k
431 * consecutive set bits at the end, so we will do (n-k-1)
432 * lshift-ands and check if last bit is set.
433 *
434 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
435 * we either run out of masks, lose the run, or find what we
436 * were looking for.
437 */
438 cur_msk = msk->data[msk_idx];
439 left = n;
440
441 /* if we're looking for free spaces, invert the mask */
442 if (!used)
443 cur_msk = ~cur_msk;
444
445 /* if we have an ignore mask, ignore once */
446 if (ignore_msk) {
447 cur_msk &= ignore_msk;
448 ignore_msk = 0;
449 }
450
451 /* if n can fit in within a single mask, do a search */
452 if (n <= MASK_ALIGN) {
453 uint64_t tmp_msk = cur_msk;
454 unsigned int s_idx;
455 for (s_idx = 0; s_idx < n - 1; s_idx++)
456 tmp_msk &= tmp_msk << 1ULL;
457 /* we found what we were looking for */
458 if (tmp_msk != 0) {
459 /* clz will give us offset from end of mask, and
460 * we only get the end of our run, not start,
461 * so adjust result to point to where start
462 * would have been.
463 */
464 run_start = MASK_ALIGN -
465 __builtin_clzll(tmp_msk) - n;
466 return MASK_GET_IDX(msk_idx, run_start);
467 }
468 }
469
470 /*
471 * we didn't find our run within the mask, or n > MASK_ALIGN,
472 * so we're going for plan B.
473 */
474
475 /* count trailing zeroes on inverted mask */
476 if (~cur_msk == 0)
477 ctz = sizeof(cur_msk) * 8;
478 else
479 ctz = __builtin_ctzll(~cur_msk);
480
481 /* if there aren't any runs at the start either, just
482 * continue
483 */
484 if (ctz == 0)
485 continue;
486
487 /* we have a partial run at the start, so try looking behind */
488 run_end = MASK_GET_IDX(msk_idx, ctz);
489 left -= ctz;
490
491 /* go backwards, include zero */
492 lookbehind_idx = msk_idx - 1;
493
494 /* we can't lookbehind as we've run out of masks, so stop */
495 if (msk_idx == 0)
496 break;
497
498 do {
499 const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
500 unsigned int s_idx, need;
501
502 lookbehind_msk = msk->data[lookbehind_idx];
503
504 /* if we're looking for free space, invert the mask */
505 if (!used)
506 lookbehind_msk = ~lookbehind_msk;
507
508 /* figure out how many consecutive bits we need here */
509 need = RTE_MIN(left, MASK_ALIGN);
510
511 for (s_idx = 0; s_idx < need - 1; s_idx++)
512 lookbehind_msk &= lookbehind_msk << 1ULL;
513
514 /* if last bit is not set, we've lost the run */
515 if ((lookbehind_msk & last_bit) == 0) {
516 /*
517 * we've scanned this far, so we know there are
518 * no runs in the space we've lookbehind-scanned
519 * as well, so skip that on next iteration.
520 */
521 ignore_msk = -1ULL << need;
522 msk_idx = lookbehind_idx;
523 break;
524 }
525
526 left -= need;
527
528 /* check if we've found what we were looking for */
529 if (left == 0) {
530 found = true;
531 break;
532 }
533 } while ((lookbehind_idx--) != 0); /* decrement after check to
534 * include zero
535 */
536
537 /* we didn't find anything, so continue */
538 if (!found)
539 continue;
540
541 /* we've found what we were looking for, but we only know where
542 * the run ended, so calculate start position.
543 */
544 return run_end - n;
545 } while (msk_idx-- != 0); /* decrement after check to include zero */
546 /* we didn't find anything */
547 rte_errno = used ? ENOENT : ENOSPC;
548 return -1;
549 }
550
551 static int
552 find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
553 {
554 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
555 arr->len);
556 unsigned int idx, first, first_mod;
557 uint64_t ignore_msk;
558
559 /*
560 * mask only has granularity of MASK_ALIGN, but start may not be aligned
561 * on that boundary, so construct a special mask to exclude anything we
562 * don't want to see to avoid confusing clz.
563 */
564 first = MASK_LEN_TO_IDX(start);
565 first_mod = MASK_LEN_TO_MOD(start);
566 /* we're going backwards, so mask must start from the top */
567 ignore_msk = first_mod == MASK_ALIGN - 1 ?
568 -1ULL : /* prevent overflow */
569 ~(-1ULL << (first_mod + 1));
570
571 /* go backwards, include zero */
572 idx = first;
573 do {
574 uint64_t cur = msk->data[idx];
575 int found;
576
577 /* if we're looking for free entries, invert mask */
578 if (!used)
579 cur = ~cur;
580
581 /* ignore everything before start on first iteration */
582 if (idx == first)
583 cur &= ignore_msk;
584
585 /* check if we have any entries */
586 if (cur == 0)
587 continue;
588
589 /*
590 * find last set bit - that will correspond to whatever it is
591 * that we're looking for. we're counting trailing zeroes, thus
592 * the value we get is counted from end of mask, so calculate
593 * position from start of mask.
594 */
595 found = MASK_ALIGN - __builtin_clzll(cur) - 1;
596
597 return MASK_GET_IDX(idx, found);
598 } while (idx-- != 0); /* decrement after check to include zero*/
599
600 /* we didn't find anything */
601 rte_errno = used ? ENOENT : ENOSPC;
602 return -1;
603 }
604
605 static int
606 find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
607 {
608 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
609 arr->len);
610 unsigned int idx, first, first_mod;
611 unsigned int need_len, result = 0;
612
613 first = MASK_LEN_TO_IDX(start);
614 first_mod = MASK_LEN_TO_MOD(start);
615
616 /* go backwards, include zero */
617 idx = first;
618 do {
619 uint64_t cur = msk->data[idx];
620 unsigned int run_len;
621
622 need_len = MASK_ALIGN;
623
624 /* if we're looking for free entries, invert mask */
625 if (!used)
626 cur = ~cur;
627
628 /* ignore everything after start on first iteration */
629 if (idx == first) {
630 unsigned int end_len = MASK_ALIGN - first_mod - 1;
631 cur <<= end_len;
632 /* at the start, we don't need the full mask len */
633 need_len -= end_len;
634 }
635
636 /* we will be looking for zeroes, so invert the mask */
637 cur = ~cur;
638
639 /* if mask is zero, we have a complete run */
640 if (cur == 0)
641 goto endloop;
642
643 /*
644 * see where run ends, starting from the end.
645 */
646 run_len = __builtin_clzll(cur);
647
648 /* add however many zeroes we've had in the last run and quit */
649 if (run_len < need_len) {
650 result += run_len;
651 break;
652 }
653 endloop:
654 result += need_len;
655 } while (idx-- != 0); /* decrement after check to include zero */
656 return result;
657 }
658
659 static int
660 set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
661 {
662 struct used_mask *msk;
663 uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
664 unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
665 bool already_used;
666 int ret = -1;
667
668 if (arr == NULL || idx >= arr->len) {
669 rte_errno = EINVAL;
670 return -1;
671 }
672 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
673 ret = 0;
674
675 /* prevent array from changing under us */
676 rte_rwlock_write_lock(&arr->rwlock);
677
678 already_used = (msk->data[msk_idx] & msk_bit) != 0;
679
680 /* nothing to be done */
681 if (used == already_used)
682 goto out;
683
684 if (used) {
685 msk->data[msk_idx] |= msk_bit;
686 arr->count++;
687 } else {
688 msk->data[msk_idx] &= ~msk_bit;
689 arr->count--;
690 }
691 out:
692 rte_rwlock_write_unlock(&arr->rwlock);
693
694 return ret;
695 }
696
697 static int
698 fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
699 {
700 if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
701 rte_errno = EINVAL;
702 return -1;
703 }
704
705 if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
706 rte_errno = ENAMETOOLONG;
707 return -1;
708 }
709 return 0;
710 }
711
712 int __rte_experimental
713 rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
714 unsigned int elt_sz)
715 {
716 size_t page_sz, mmap_len;
717 char path[PATH_MAX];
718 struct used_mask *msk;
719 struct mem_area *ma = NULL;
720 void *data = NULL;
721 int fd = -1;
722
723 if (arr == NULL) {
724 rte_errno = EINVAL;
725 return -1;
726 }
727
728 if (fully_validate(name, elt_sz, len))
729 return -1;
730
731 /* allocate mem area before doing anything */
732 ma = malloc(sizeof(*ma));
733 if (ma == NULL) {
734 rte_errno = ENOMEM;
735 return -1;
736 }
737
738 page_sz = sysconf(_SC_PAGESIZE);
739 if (page_sz == (size_t)-1) {
740 free(ma);
741 return -1;
742 }
743
744 /* calculate our memory limits */
745 mmap_len = calc_data_size(page_sz, elt_sz, len);
746
747 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
748 if (data == NULL) {
749 free(ma);
750 return -1;
751 }
752
753 rte_spinlock_lock(&mem_area_lock);
754
755 fd = -1;
756
757 if (internal_config.no_shconf) {
758 /* remap virtual area as writable */
759 void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
760 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
761 if (new_data == MAP_FAILED) {
762 RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
763 __func__, strerror(errno));
764 goto fail;
765 }
766 } else {
767 eal_get_fbarray_path(path, sizeof(path), name);
768
769 /*
770 * Each fbarray is unique to process namespace, i.e. the
771 * filename depends on process prefix. Try to take out a lock
772 * and see if we succeed. If we don't, someone else is using it
773 * already.
774 */
775 fd = open(path, O_CREAT | O_RDWR, 0600);
776 if (fd < 0) {
777 RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
778 __func__, path, strerror(errno));
779 rte_errno = errno;
780 goto fail;
781 } else if (flock(fd, LOCK_EX | LOCK_NB)) {
782 RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
783 __func__, path, strerror(errno));
784 rte_errno = EBUSY;
785 goto fail;
786 }
787
788 /* take out a non-exclusive lock, so that other processes could
789 * still attach to it, but no other process could reinitialize
790 * it.
791 */
792 if (flock(fd, LOCK_SH | LOCK_NB)) {
793 rte_errno = errno;
794 goto fail;
795 }
796
797 if (resize_and_map(fd, data, mmap_len))
798 goto fail;
799 }
800 ma->addr = data;
801 ma->len = mmap_len;
802 ma->fd = fd;
803
804 /* do not close fd - keep it until detach/destroy */
805 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
806
807 /* initialize the data */
808 memset(data, 0, mmap_len);
809
810 /* populate data structure */
811 strlcpy(arr->name, name, sizeof(arr->name));
812 arr->data = data;
813 arr->len = len;
814 arr->elt_sz = elt_sz;
815 arr->count = 0;
816
817 msk = get_used_mask(data, elt_sz, len);
818 msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
819
820 rte_rwlock_init(&arr->rwlock);
821
822 rte_spinlock_unlock(&mem_area_lock);
823
824 return 0;
825 fail:
826 if (data)
827 munmap(data, mmap_len);
828 if (fd >= 0)
829 close(fd);
830 free(ma);
831
832 rte_spinlock_unlock(&mem_area_lock);
833 return -1;
834 }
835
836 int __rte_experimental
837 rte_fbarray_attach(struct rte_fbarray *arr)
838 {
839 struct mem_area *ma = NULL, *tmp = NULL;
840 size_t page_sz, mmap_len;
841 char path[PATH_MAX];
842 void *data = NULL;
843 int fd = -1;
844
845 if (arr == NULL) {
846 rte_errno = EINVAL;
847 return -1;
848 }
849
850 /*
851 * we don't need to synchronize attach as two values we need (element
852 * size and array length) are constant for the duration of life of
853 * the array, so the parts we care about will not race.
854 */
855
856 if (fully_validate(arr->name, arr->elt_sz, arr->len))
857 return -1;
858
859 ma = malloc(sizeof(*ma));
860 if (ma == NULL) {
861 rte_errno = ENOMEM;
862 return -1;
863 }
864
865 page_sz = sysconf(_SC_PAGESIZE);
866 if (page_sz == (size_t)-1) {
867 free(ma);
868 return -1;
869 }
870
871 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
872
873 /* check the tailq - maybe user has already mapped this address space */
874 rte_spinlock_lock(&mem_area_lock);
875
876 TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
877 if (overlap(tmp, arr->data, mmap_len)) {
878 rte_errno = EEXIST;
879 goto fail;
880 }
881 }
882
883 /* we know this memory area is unique, so proceed */
884
885 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
886 if (data == NULL)
887 goto fail;
888
889 eal_get_fbarray_path(path, sizeof(path), arr->name);
890
891 fd = open(path, O_RDWR);
892 if (fd < 0) {
893 rte_errno = errno;
894 goto fail;
895 }
896
897 /* lock the file, to let others know we're using it */
898 if (flock(fd, LOCK_SH | LOCK_NB)) {
899 rte_errno = errno;
900 goto fail;
901 }
902
903 if (resize_and_map(fd, data, mmap_len))
904 goto fail;
905
906 /* store our new memory area */
907 ma->addr = data;
908 ma->fd = fd; /* keep fd until detach/destroy */
909 ma->len = mmap_len;
910
911 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next);
912
913 /* we're done */
914
915 rte_spinlock_unlock(&mem_area_lock);
916 return 0;
917 fail:
918 if (data)
919 munmap(data, mmap_len);
920 if (fd >= 0)
921 close(fd);
922 free(ma);
923 rte_spinlock_unlock(&mem_area_lock);
924 return -1;
925 }
926
927 int __rte_experimental
928 rte_fbarray_detach(struct rte_fbarray *arr)
929 {
930 struct mem_area *tmp = NULL;
931 size_t mmap_len;
932 int ret = -1;
933
934 if (arr == NULL) {
935 rte_errno = EINVAL;
936 return -1;
937 }
938
939 /*
940 * we don't need to synchronize detach as two values we need (element
941 * size and total capacity) are constant for the duration of life of
942 * the array, so the parts we care about will not race. if the user is
943 * detaching while doing something else in the same process, we can't
944 * really do anything about it, things will blow up either way.
945 */
946
947 size_t page_sz = sysconf(_SC_PAGESIZE);
948
949 if (page_sz == (size_t)-1)
950 return -1;
951
952 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
953
954 /* does this area exist? */
955 rte_spinlock_lock(&mem_area_lock);
956
957 TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
958 if (tmp->addr == arr->data && tmp->len == mmap_len)
959 break;
960 }
961 if (tmp == NULL) {
962 rte_errno = ENOENT;
963 ret = -1;
964 goto out;
965 }
966
967 munmap(arr->data, mmap_len);
968
969 /* area is unmapped, close fd and remove the tailq entry */
970 if (tmp->fd >= 0)
971 close(tmp->fd);
972 TAILQ_REMOVE(&mem_area_tailq, tmp, next);
973 free(tmp);
974
975 ret = 0;
976 out:
977 rte_spinlock_unlock(&mem_area_lock);
978 return ret;
979 }
980
981 int __rte_experimental
982 rte_fbarray_destroy(struct rte_fbarray *arr)
983 {
984 struct mem_area *tmp = NULL;
985 size_t mmap_len;
986 int fd, ret;
987 char path[PATH_MAX];
988
989 if (arr == NULL) {
990 rte_errno = EINVAL;
991 return -1;
992 }
993
994 /*
995 * we don't need to synchronize detach as two values we need (element
996 * size and total capacity) are constant for the duration of life of
997 * the array, so the parts we care about will not race. if the user is
998 * detaching while doing something else in the same process, we can't
999 * really do anything about it, things will blow up either way.
1000 */
1001
1002 size_t page_sz = sysconf(_SC_PAGESIZE);
1003
1004 if (page_sz == (size_t)-1)
1005 return -1;
1006
1007 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
1008
1009 /* does this area exist? */
1010 rte_spinlock_lock(&mem_area_lock);
1011
1012 TAILQ_FOREACH(tmp, &mem_area_tailq, next) {
1013 if (tmp->addr == arr->data && tmp->len == mmap_len)
1014 break;
1015 }
1016 if (tmp == NULL) {
1017 rte_errno = ENOENT;
1018 ret = -1;
1019 goto out;
1020 }
1021 /* with no shconf, there were never any files to begin with */
1022 if (!internal_config.no_shconf) {
1023 /*
1024 * attempt to get an exclusive lock on the file, to ensure it
1025 * has been detached by all other processes
1026 */
1027 fd = tmp->fd;
1028 if (flock(fd, LOCK_EX | LOCK_NB)) {
1029 RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
1030 rte_errno = EBUSY;
1031 ret = -1;
1032 goto out;
1033 }
1034
1035 /* we're OK to destroy the file */
1036 eal_get_fbarray_path(path, sizeof(path), arr->name);
1037 if (unlink(path)) {
1038 RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n",
1039 strerror(errno));
1040 rte_errno = errno;
1041 /*
1042 * we're still holding an exclusive lock, so drop it to
1043 * shared.
1044 */
1045 flock(fd, LOCK_SH | LOCK_NB);
1046
1047 ret = -1;
1048 goto out;
1049 }
1050 close(fd);
1051 }
1052 munmap(arr->data, mmap_len);
1053
1054 /* area is unmapped, remove the tailq entry */
1055 TAILQ_REMOVE(&mem_area_tailq, tmp, next);
1056 free(tmp);
1057 ret = 0;
1058 out:
1059 rte_spinlock_unlock(&mem_area_lock);
1060 return ret;
1061 }
1062
1063 void * __rte_experimental
1064 rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
1065 {
1066 void *ret = NULL;
1067 if (arr == NULL) {
1068 rte_errno = EINVAL;
1069 return NULL;
1070 }
1071
1072 if (idx >= arr->len) {
1073 rte_errno = EINVAL;
1074 return NULL;
1075 }
1076
1077 ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
1078
1079 return ret;
1080 }
1081
1082 int __rte_experimental
1083 rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
1084 {
1085 return set_used(arr, idx, true);
1086 }
1087
1088 int __rte_experimental
1089 rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
1090 {
1091 return set_used(arr, idx, false);
1092 }
1093
1094 int __rte_experimental
1095 rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
1096 {
1097 struct used_mask *msk;
1098 int msk_idx;
1099 uint64_t msk_bit;
1100 int ret = -1;
1101
1102 if (arr == NULL || idx >= arr->len) {
1103 rte_errno = EINVAL;
1104 return -1;
1105 }
1106
1107 /* prevent array from changing under us */
1108 rte_rwlock_read_lock(&arr->rwlock);
1109
1110 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
1111 msk_idx = MASK_LEN_TO_IDX(idx);
1112 msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
1113
1114 ret = (msk->data[msk_idx] & msk_bit) != 0;
1115
1116 rte_rwlock_read_unlock(&arr->rwlock);
1117
1118 return ret;
1119 }
1120
1121 static int
1122 fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
1123 {
1124 int ret = -1;
1125
1126 if (arr == NULL || start >= arr->len) {
1127 rte_errno = EINVAL;
1128 return -1;
1129 }
1130
1131 /* prevent array from changing under us */
1132 rte_rwlock_read_lock(&arr->rwlock);
1133
1134 /* cheap checks to prevent doing useless work */
1135 if (!used) {
1136 if (arr->len == arr->count) {
1137 rte_errno = ENOSPC;
1138 goto out;
1139 }
1140 if (arr->count == 0) {
1141 ret = start;
1142 goto out;
1143 }
1144 } else {
1145 if (arr->count == 0) {
1146 rte_errno = ENOENT;
1147 goto out;
1148 }
1149 if (arr->len == arr->count) {
1150 ret = start;
1151 goto out;
1152 }
1153 }
1154 if (next)
1155 ret = find_next(arr, start, used);
1156 else
1157 ret = find_prev(arr, start, used);
1158 out:
1159 rte_rwlock_read_unlock(&arr->rwlock);
1160 return ret;
1161 }
1162
1163 int __rte_experimental
1164 rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
1165 {
1166 return fbarray_find(arr, start, true, false);
1167 }
1168
1169 int __rte_experimental
1170 rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
1171 {
1172 return fbarray_find(arr, start, true, true);
1173 }
1174
1175 int __rte_experimental
1176 rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
1177 {
1178 return fbarray_find(arr, start, false, false);
1179 }
1180
1181 int __rte_experimental
1182 rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
1183 {
1184 return fbarray_find(arr, start, false, true);
1185 }
1186
1187 static int
1188 fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
1189 bool next, bool used)
1190 {
1191 int ret = -1;
1192
1193 if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
1194 rte_errno = EINVAL;
1195 return -1;
1196 }
1197 if (next && (arr->len - start) < n) {
1198 rte_errno = used ? ENOENT : ENOSPC;
1199 return -1;
1200 }
1201 if (!next && start < (n - 1)) {
1202 rte_errno = used ? ENOENT : ENOSPC;
1203 return -1;
1204 }
1205
1206 /* prevent array from changing under us */
1207 rte_rwlock_read_lock(&arr->rwlock);
1208
1209 /* cheap checks to prevent doing useless work */
1210 if (!used) {
1211 if (arr->len == arr->count || arr->len - arr->count < n) {
1212 rte_errno = ENOSPC;
1213 goto out;
1214 }
1215 if (arr->count == 0) {
1216 ret = next ? start : start - n + 1;
1217 goto out;
1218 }
1219 } else {
1220 if (arr->count < n) {
1221 rte_errno = ENOENT;
1222 goto out;
1223 }
1224 if (arr->count == arr->len) {
1225 ret = next ? start : start - n + 1;
1226 goto out;
1227 }
1228 }
1229
1230 if (next)
1231 ret = find_next_n(arr, start, n, used);
1232 else
1233 ret = find_prev_n(arr, start, n, used);
1234 out:
1235 rte_rwlock_read_unlock(&arr->rwlock);
1236 return ret;
1237 }
1238
1239 int __rte_experimental
1240 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
1241 unsigned int n)
1242 {
1243 return fbarray_find_n(arr, start, n, true, false);
1244 }
1245
1246 int __rte_experimental
1247 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
1248 unsigned int n)
1249 {
1250 return fbarray_find_n(arr, start, n, true, true);
1251 }
1252
1253 int __rte_experimental
1254 rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
1255 unsigned int n)
1256 {
1257 return fbarray_find_n(arr, start, n, false, false);
1258 }
1259
1260 int __rte_experimental
1261 rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
1262 unsigned int n)
1263 {
1264 return fbarray_find_n(arr, start, n, false, true);
1265 }
1266
1267 static int
1268 fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
1269 bool used)
1270 {
1271 int ret = -1;
1272
1273 if (arr == NULL || start >= arr->len) {
1274 rte_errno = EINVAL;
1275 return -1;
1276 }
1277
1278 /* prevent array from changing under us */
1279 rte_rwlock_read_lock(&arr->rwlock);
1280
1281 /* cheap checks to prevent doing useless work */
1282 if (used) {
1283 if (arr->count == 0) {
1284 ret = 0;
1285 goto out;
1286 }
1287 if (next && arr->count == arr->len) {
1288 ret = arr->len - start;
1289 goto out;
1290 }
1291 if (!next && arr->count == arr->len) {
1292 ret = start + 1;
1293 goto out;
1294 }
1295 } else {
1296 if (arr->len == arr->count) {
1297 ret = 0;
1298 goto out;
1299 }
1300 if (next && arr->count == 0) {
1301 ret = arr->len - start;
1302 goto out;
1303 }
1304 if (!next && arr->count == 0) {
1305 ret = start + 1;
1306 goto out;
1307 }
1308 }
1309
1310 if (next)
1311 ret = find_contig(arr, start, used);
1312 else
1313 ret = find_rev_contig(arr, start, used);
1314 out:
1315 rte_rwlock_read_unlock(&arr->rwlock);
1316 return ret;
1317 }
1318
1319 static int
1320 fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used,
1321 bool rev)
1322 {
1323 int cur_idx, next_idx, cur_len, biggest_idx, biggest_len;
1324 /* don't stack if conditions, use function pointers instead */
1325 int (*find_func)(struct rte_fbarray *, unsigned int);
1326 int (*find_contig_func)(struct rte_fbarray *, unsigned int);
1327
1328 if (arr == NULL || start >= arr->len) {
1329 rte_errno = EINVAL;
1330 return -1;
1331 }
1332 /* the other API calls already do their fair share of cheap checks, so
1333 * no need to do them here.
1334 */
1335
1336 /* the API's called are thread-safe, but something may still happen
1337 * inbetween the API calls, so lock the fbarray. all other API's are
1338 * read-locking the fbarray, so read lock here is OK.
1339 */
1340 rte_rwlock_read_lock(&arr->rwlock);
1341
1342 /* pick out appropriate functions */
1343 if (used) {
1344 if (rev) {
1345 find_func = rte_fbarray_find_prev_used;
1346 find_contig_func = rte_fbarray_find_rev_contig_used;
1347 } else {
1348 find_func = rte_fbarray_find_next_used;
1349 find_contig_func = rte_fbarray_find_contig_used;
1350 }
1351 } else {
1352 if (rev) {
1353 find_func = rte_fbarray_find_prev_free;
1354 find_contig_func = rte_fbarray_find_rev_contig_free;
1355 } else {
1356 find_func = rte_fbarray_find_next_free;
1357 find_contig_func = rte_fbarray_find_contig_free;
1358 }
1359 }
1360
1361 cur_idx = start;
1362 biggest_idx = -1; /* default is error */
1363 biggest_len = 0;
1364 for (;;) {
1365 cur_idx = find_func(arr, cur_idx);
1366
1367 /* block found, check its length */
1368 if (cur_idx >= 0) {
1369 cur_len = find_contig_func(arr, cur_idx);
1370 /* decide where we go next */
1371 next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len;
1372 /* move current index to start of chunk */
1373 cur_idx = rev ? next_idx + 1 : cur_idx;
1374
1375 if (cur_len > biggest_len) {
1376 biggest_idx = cur_idx;
1377 biggest_len = cur_len;
1378 }
1379 cur_idx = next_idx;
1380 /* in reverse mode, next_idx may be -1 if chunk started
1381 * at array beginning. this means there's no more work
1382 * to do.
1383 */
1384 if (cur_idx < 0)
1385 break;
1386 } else {
1387 /* nothing more to find, stop. however, a failed API
1388 * call has set rte_errno, which we want to ignore, as
1389 * reaching the end of fbarray is not an error.
1390 */
1391 rte_errno = 0;
1392 break;
1393 }
1394 }
1395 /* if we didn't find anything at all, set rte_errno */
1396 if (biggest_idx < 0)
1397 rte_errno = used ? ENOENT : ENOSPC;
1398
1399 rte_rwlock_read_unlock(&arr->rwlock);
1400 return biggest_idx;
1401 }
1402
1403 int __rte_experimental
1404 rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start)
1405 {
1406 return fbarray_find_biggest(arr, start, false, false);
1407 }
1408
1409 int __rte_experimental
1410 rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start)
1411 {
1412 return fbarray_find_biggest(arr, start, true, false);
1413 }
1414
1415 int __rte_experimental
1416 rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start)
1417 {
1418 return fbarray_find_biggest(arr, start, false, true);
1419 }
1420
1421 int __rte_experimental
1422 rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start)
1423 {
1424 return fbarray_find_biggest(arr, start, true, true);
1425 }
1426
1427
1428 int __rte_experimental
1429 rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
1430 {
1431 return fbarray_find_contig(arr, start, true, false);
1432 }
1433
1434 int __rte_experimental
1435 rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
1436 {
1437 return fbarray_find_contig(arr, start, true, true);
1438 }
1439
1440 int __rte_experimental
1441 rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
1442 {
1443 return fbarray_find_contig(arr, start, false, false);
1444 }
1445
1446 int __rte_experimental
1447 rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
1448 {
1449 return fbarray_find_contig(arr, start, false, true);
1450 }
1451
1452 int __rte_experimental
1453 rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
1454 {
1455 void *end;
1456 int ret = -1;
1457
1458 /*
1459 * no need to synchronize as it doesn't matter if underlying data
1460 * changes - we're doing pointer arithmetic here.
1461 */
1462
1463 if (arr == NULL || elt == NULL) {
1464 rte_errno = EINVAL;
1465 return -1;
1466 }
1467 end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
1468 if (elt < arr->data || elt >= end) {
1469 rte_errno = EINVAL;
1470 return -1;
1471 }
1472
1473 ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
1474
1475 return ret;
1476 }
1477
1478 void __rte_experimental
1479 rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
1480 {
1481 struct used_mask *msk;
1482 unsigned int i;
1483
1484 if (arr == NULL || f == NULL) {
1485 rte_errno = EINVAL;
1486 return;
1487 }
1488
1489 if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
1490 fprintf(f, "Invalid file-backed array\n");
1491 goto out;
1492 }
1493
1494 /* prevent array from changing under us */
1495 rte_rwlock_read_lock(&arr->rwlock);
1496
1497 fprintf(f, "File-backed array: %s\n", arr->name);
1498 fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
1499 arr->len, arr->count, arr->elt_sz);
1500
1501 msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
1502
1503 for (i = 0; i < msk->n_masks; i++)
1504 fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
1505 out:
1506 rte_rwlock_read_unlock(&arr->rwlock);
1507 }