]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2017 Intel Corporation | |
3 | */ | |
4 | ||
f67539c2 TL |
5 | #ifndef _DIST_PRIV_H_ |
6 | #define _DIST_PRIV_H_ | |
11fdf7f2 TL |
7 | |
8 | /** | |
9 | * @file | |
10 | * RTE distributor | |
11 | * | |
12 | * The distributor is a component which is designed to pass packets | |
13 | * one-at-a-time to workers, with dynamic load balancing. | |
14 | */ | |
15 | ||
16 | #ifdef __cplusplus | |
17 | extern "C" { | |
18 | #endif | |
19 | ||
20 | #define NO_FLAGS 0 | |
21 | #define RTE_DISTRIB_PREFIX "DT_" | |
22 | ||
23 | /* | |
24 | * We will use the bottom four bits of pointer for flags, shifting out | |
25 | * the top four bits to make room (since a 64-bit pointer actually only uses | |
26 | * 48 bits). An arithmetic-right-shift will then appropriately restore the | |
27 | * original pointer value with proper sign extension into the top bits. | |
28 | */ | |
29 | #define RTE_DISTRIB_FLAG_BITS 4 | |
30 | #define RTE_DISTRIB_FLAGS_MASK (0x0F) | |
31 | #define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ | |
32 | #define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ | |
33 | #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ | |
34 | #define RTE_DISTRIB_VALID_BUF (4) /**< set if bufptr contains ptr */ | |
35 | ||
36 | #define RTE_DISTRIB_BACKLOG_SIZE 8 | |
37 | #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) | |
38 | ||
39 | #define RTE_DISTRIB_MAX_RETURNS 128 | |
40 | #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) | |
41 | ||
42 | /** | |
43 | * Maximum number of workers allowed. | |
9f95a23c | 44 | * Be aware of increasing the limit, because it is limited by how we track |
11fdf7f2 TL |
45 | * in-flight tags. See in_flight_bitmask and rte_distributor_process |
46 | */ | |
47 | #define RTE_DISTRIB_MAX_WORKERS 64 | |
48 | ||
49 | #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ | |
50 | ||
51 | /** | |
52 | * Buffer structure used to pass the pointer data between cores. This is cache | |
53 | * line aligned, but to improve performance and prevent adjacent cache-line | |
54 | * prefetches of buffers for other workers, e.g. when worker 1's buffer is on | |
55 | * the next cache line to worker 0, we pad this out to three cache lines. | |
56 | * Only 64-bits of the memory is actually used though. | |
57 | */ | |
f67539c2 | 58 | union rte_distributor_buffer_single { |
11fdf7f2 TL |
59 | volatile int64_t bufptr64; |
60 | char pad[RTE_CACHE_LINE_SIZE*3]; | |
61 | } __rte_cache_aligned; | |
62 | ||
63 | /* | |
64 | * Transfer up to 8 mbufs at a time to/from workers, and | |
65 | * flow matching algorithm optimized for 8 flow IDs at a time | |
66 | */ | |
67 | #define RTE_DIST_BURST_SIZE 8 | |
68 | ||
69 | struct rte_distributor_backlog { | |
70 | unsigned int start; | |
71 | unsigned int count; | |
72 | int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned; | |
73 | uint16_t *tags; /* will point to second cacheline of inflights */ | |
74 | } __rte_cache_aligned; | |
75 | ||
76 | ||
77 | struct rte_distributor_returned_pkts { | |
78 | unsigned int start; | |
79 | unsigned int count; | |
80 | struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; | |
81 | }; | |
82 | ||
f67539c2 TL |
83 | struct rte_distributor_single { |
84 | TAILQ_ENTRY(rte_distributor_single) next; /**< Next in list. */ | |
11fdf7f2 TL |
85 | |
86 | char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ | |
87 | unsigned int num_workers; /**< Number of workers polling */ | |
88 | ||
89 | uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; | |
90 | /**< Tracks the tag being processed per core */ | |
91 | uint64_t in_flight_bitmask; | |
92 | /**< on/off bits for in-flight tags. | |
93 | * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then | |
94 | * the bitmask has to expand. | |
95 | */ | |
96 | ||
97 | struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; | |
98 | ||
f67539c2 | 99 | union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS]; |
11fdf7f2 TL |
100 | |
101 | struct rte_distributor_returned_pkts returns; | |
102 | }; | |
103 | ||
104 | /* All different signature compare functions */ | |
105 | enum rte_distributor_match_function { | |
106 | RTE_DIST_MATCH_SCALAR = 0, | |
107 | RTE_DIST_MATCH_VECTOR, | |
108 | RTE_DIST_NUM_MATCH_FNS | |
109 | }; | |
110 | ||
111 | /** | |
112 | * Buffer structure used to pass the pointer data between cores. This is cache | |
113 | * line aligned, but to improve performance and prevent adjacent cache-line | |
114 | * prefetches of buffers for other workers, e.g. when worker 1's buffer is on | |
115 | * the next cache line to worker 0, we pad this out to two cache lines. | |
116 | * We can pass up to 8 mbufs at a time in one cacheline. | |
117 | * There is a separate cacheline for returns in the burst API. | |
118 | */ | |
119 | struct rte_distributor_buffer { | |
120 | volatile int64_t bufptr64[RTE_DIST_BURST_SIZE] | |
121 | __rte_cache_aligned; /* <= outgoing to worker */ | |
122 | ||
123 | int64_t pad1 __rte_cache_aligned; /* <= one cache line */ | |
124 | ||
125 | volatile int64_t retptr64[RTE_DIST_BURST_SIZE] | |
126 | __rte_cache_aligned; /* <= incoming from worker */ | |
127 | ||
128 | int64_t pad2 __rte_cache_aligned; /* <= one cache line */ | |
129 | ||
130 | int count __rte_cache_aligned; /* <= number of current mbufs */ | |
131 | }; | |
132 | ||
133 | struct rte_distributor { | |
134 | TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ | |
135 | ||
136 | char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ | |
137 | unsigned int num_workers; /**< Number of workers polling */ | |
138 | unsigned int alg_type; /**< Number of alg types */ | |
139 | ||
140 | /**> | |
141 | * First cache line in the this array are the tags inflight | |
142 | * on the worker core. Second cache line are the backlog | |
143 | * that are going to go to the worker core. | |
144 | */ | |
145 | uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2] | |
146 | __rte_cache_aligned; | |
147 | ||
148 | struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS] | |
149 | __rte_cache_aligned; | |
150 | ||
151 | struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; | |
152 | ||
153 | struct rte_distributor_returned_pkts returns; | |
154 | ||
155 | enum rte_distributor_match_function dist_match_fn; | |
156 | ||
f67539c2 | 157 | struct rte_distributor_single *d_single; |
11fdf7f2 TL |
158 | }; |
159 | ||
160 | void | |
161 | find_match_scalar(struct rte_distributor *d, | |
162 | uint16_t *data_ptr, | |
163 | uint16_t *output_ptr); | |
164 | ||
165 | void | |
166 | find_match_vec(struct rte_distributor *d, | |
167 | uint16_t *data_ptr, | |
168 | uint16_t *output_ptr); | |
169 | ||
170 | #ifdef __cplusplus | |
171 | } | |
172 | #endif | |
173 | ||
f67539c2 | 174 | #endif /* _DIST_PRIV_H_ */ |