]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #ifndef __RTE_RED_H_INCLUDED__ | |
35 | #define __RTE_RED_H_INCLUDED__ | |
36 | ||
37 | #ifdef __cplusplus | |
38 | extern "C" { | |
39 | #endif | |
40 | ||
41 | /** | |
42 | * @file | |
43 | * RTE Random Early Detection (RED) | |
44 | * | |
45 | * | |
46 | ***/ | |
47 | ||
48 | #include <stdint.h> | |
49 | #include <limits.h> | |
50 | #include <rte_common.h> | |
51 | #include <rte_debug.h> | |
52 | #include <rte_cycles.h> | |
53 | #include <rte_branch_prediction.h> | |
54 | ||
55 | #define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */ | |
56 | #define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */ | |
57 | #define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */ | |
58 | #define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */ | |
59 | #define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */ | |
60 | #define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */ | |
61 | #define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */ | |
62 | #define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */ | |
63 | #define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT) | |
64 | #define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1) | |
65 | ||
66 | /** | |
67 | * Externs | |
68 | * | |
69 | */ | |
70 | extern uint32_t rte_red_rand_val; | |
71 | extern uint32_t rte_red_rand_seed; | |
72 | extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM]; | |
73 | extern uint16_t rte_red_pow2_frac_inv[16]; | |
74 | ||
75 | /** | |
76 | * RED configuration parameters passed by user | |
77 | * | |
78 | */ | |
79 | struct rte_red_params { | |
80 | uint16_t min_th; /**< Minimum threshold for queue (max_th) */ | |
81 | uint16_t max_th; /**< Maximum threshold for queue (max_th) */ | |
82 | uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */ | |
83 | uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */ | |
84 | }; | |
85 | ||
86 | /** | |
87 | * RED configuration parameters | |
88 | */ | |
89 | struct rte_red_config { | |
90 | uint32_t min_th; /**< min_th scaled in fixed-point format */ | |
91 | uint32_t max_th; /**< max_th scaled in fixed-point format */ | |
92 | uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */ | |
93 | uint8_t maxp_inv; /**< maxp_inv */ | |
94 | uint8_t wq_log2; /**< wq_log2 */ | |
95 | }; | |
96 | ||
97 | /** | |
98 | * RED run-time data | |
99 | */ | |
100 | struct rte_red { | |
101 | uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */ | |
102 | uint32_t count; /**< Number of packets since last marked packet (count) */ | |
103 | uint64_t q_time; /**< Start of the queue idle time (q_time) */ | |
104 | }; | |
105 | ||
106 | /** | |
107 | * @brief Initialises run-time data | |
108 | * | |
109 | * @param red [in,out] data pointer to RED runtime data | |
110 | * | |
111 | * @return Operation status | |
112 | * @retval 0 success | |
113 | * @retval !0 error | |
114 | */ | |
115 | int | |
116 | rte_red_rt_data_init(struct rte_red *red); | |
117 | ||
118 | /** | |
119 | * @brief Configures a single RED configuration parameter structure. | |
120 | * | |
121 | * @param red_cfg [in,out] config pointer to a RED configuration parameter structure | |
122 | * @param wq_log2 [in] log2 of the filter weight, valid range is: | |
123 | * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX | |
124 | * @param min_th [in] queue minimum threshold in number of packets | |
125 | * @param max_th [in] queue maximum threshold in number of packets | |
126 | * @param maxp_inv [in] inverse maximum mark probability | |
127 | * | |
128 | * @return Operation status | |
129 | * @retval 0 success | |
130 | * @retval !0 error | |
131 | */ | |
132 | int | |
133 | rte_red_config_init(struct rte_red_config *red_cfg, | |
134 | const uint16_t wq_log2, | |
135 | const uint16_t min_th, | |
136 | const uint16_t max_th, | |
137 | const uint16_t maxp_inv); | |
138 | ||
139 | /** | |
140 | * @brief Generate random number for RED | |
141 | * | |
142 | * Implemenetation based on: | |
143 | * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/ | |
144 | * | |
145 | * 10 bit shift has been found through empirical tests (was 16). | |
146 | * | |
147 | * @return Random number between 0 and (2^22 - 1) | |
148 | */ | |
149 | static inline uint32_t | |
150 | rte_fast_rand(void) | |
151 | { | |
152 | rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011; | |
153 | return rte_red_rand_seed >> 10; | |
154 | } | |
155 | ||
156 | /** | |
157 | * @brief calculate factor to scale average queue size when queue | |
158 | * becomes empty | |
159 | * | |
160 | * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2) | |
161 | * @param m [in] exponent in the computed value (1 - wq) ^ m | |
162 | * | |
163 | * @return computed value | |
164 | * @retval ((1 - wq) ^ m) scaled in fixed-point format | |
165 | */ | |
166 | static inline uint16_t | |
167 | __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m) | |
168 | { | |
169 | uint32_t n = 0; | |
170 | uint32_t f = 0; | |
171 | ||
172 | /** | |
173 | * Basic math tells us that: | |
174 | * a^b = 2^(b * log2(a) ) | |
175 | * | |
176 | * in our case: | |
177 | * a = (1-Wq) | |
178 | * b = m | |
179 | * Wq = 1/ (2^log2n) | |
180 | * | |
181 | * So we are computing this equation: | |
182 | * factor = 2 ^ ( m * log2(1-Wq)) | |
183 | * | |
184 | * First we are computing: | |
185 | * n = m * log2(1-Wq) | |
186 | * | |
187 | * To avoid dealing with signed numbers log2 values are positive | |
188 | * but they should be negative because (1-Wq) is always < 1. | |
189 | * Contents of log2 table values are also scaled for precision. | |
190 | */ | |
191 | ||
192 | n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN]; | |
193 | ||
194 | /** | |
195 | * The tricky part is computing 2^n, for this I split n into | |
196 | * integer part and fraction part. | |
197 | * f - is fraction part of n | |
198 | * n - is integer part of original n | |
199 | * | |
200 | * Now using basic math we compute 2^n: | |
201 | * 2^(f+n) = 2^f * 2^n | |
202 | * 2^f - we use lookup table | |
203 | * 2^n - can be replaced with bit shift right oeprations | |
204 | */ | |
205 | ||
206 | f = (n >> 6) & 0xf; | |
207 | n >>= 10; | |
208 | ||
209 | if (n < RTE_RED_SCALING) | |
210 | return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n); | |
211 | ||
212 | return 0; | |
213 | } | |
214 | ||
215 | /** | |
216 | * @brief Updates queue average in condition when queue is empty | |
217 | * | |
218 | * Note: packet is never dropped in this particular case. | |
219 | * | |
220 | * @param red_cfg [in] config pointer to a RED configuration parameter structure | |
221 | * @param red [in,out] data pointer to RED runtime data | |
222 | * @param time [in] current time stamp | |
223 | * | |
224 | * @return Operation status | |
225 | * @retval 0 enqueue the packet | |
226 | * @retval 1 drop the packet based on max threshold criterion | |
227 | * @retval 2 drop the packet based on mark probability criterion | |
228 | */ | |
229 | static inline int | |
230 | rte_red_enqueue_empty(const struct rte_red_config *red_cfg, | |
231 | struct rte_red *red, | |
232 | const uint64_t time) | |
233 | { | |
234 | uint64_t time_diff = 0, m = 0; | |
235 | ||
236 | RTE_ASSERT(red_cfg != NULL); | |
237 | RTE_ASSERT(red != NULL); | |
238 | ||
239 | red->count ++; | |
240 | ||
241 | /** | |
242 | * We compute avg but we don't compare avg against | |
243 | * min_th or max_th, nor calculate drop probability | |
244 | */ | |
245 | time_diff = time - red->q_time; | |
246 | ||
247 | /** | |
248 | * m is the number of packets that might have arrived while the queue was empty. | |
249 | * In this case we have time stamps provided by scheduler in byte units (bytes | |
250 | * transmitted on network port). Such time stamp translates into time units as | |
251 | * port speed is fixed but such approach simplifies the code. | |
252 | */ | |
253 | m = time_diff / RTE_RED_S; | |
254 | ||
255 | /** | |
256 | * Check that m will fit into 16-bit unsigned integer | |
257 | */ | |
258 | if (m >= RTE_RED_2POW16) { | |
259 | red->avg = 0; | |
260 | } else { | |
261 | red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m); | |
262 | } | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
267 | /** | |
268 | * Drop probability (Sally Floyd and Van Jacobson): | |
269 | * | |
270 | * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th) | |
271 | * pa = pb / (2 - count * pb) | |
272 | * | |
273 | * | |
274 | * (1 / maxp_inv) * (avg - min_th) | |
275 | * --------------------------------- | |
276 | * max_th - min_th | |
277 | * pa = ----------------------------------------------- | |
278 | * count * (1 / maxp_inv) * (avg - min_th) | |
279 | * 2 - ----------------------------------------- | |
280 | * max_th - min_th | |
281 | * | |
282 | * | |
283 | * avg - min_th | |
284 | * pa = ----------------------------------------------------------- | |
285 | * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th) | |
286 | * | |
287 | * | |
288 | * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then: | |
289 | * | |
290 | * | |
291 | * avg - min_th | |
292 | * pa = ----------------------------------- | |
293 | * pa_const - count * (avg - min_th) | |
294 | */ | |
295 | ||
296 | /** | |
297 | * @brief make a decision to drop or enqueue a packet based on mark probability | |
298 | * criteria | |
299 | * | |
300 | * @param red_cfg [in] config pointer to structure defining RED parameters | |
301 | * @param red [in,out] data pointer to RED runtime data | |
302 | * | |
303 | * @return operation status | |
304 | * @retval 0 enqueue the packet | |
305 | * @retval 1 drop the packet | |
306 | */ | |
307 | static inline int | |
308 | __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red) | |
309 | { | |
310 | uint32_t pa_num = 0; /* numerator of drop-probability */ | |
311 | uint32_t pa_den = 0; /* denominator of drop-probability */ | |
312 | uint32_t pa_num_count = 0; | |
313 | ||
314 | pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2); | |
315 | ||
316 | pa_num_count = red->count * pa_num; | |
317 | ||
318 | if (red_cfg->pa_const <= pa_num_count) | |
319 | return 1; | |
320 | ||
321 | pa_den = red_cfg->pa_const - pa_num_count; | |
322 | ||
323 | /* If drop, generate and save random number to be used next time */ | |
324 | if (unlikely((rte_red_rand_val % pa_den) < pa_num)) { | |
325 | rte_red_rand_val = rte_fast_rand(); | |
326 | ||
327 | return 1; | |
328 | } | |
329 | ||
330 | /* No drop */ | |
331 | return 0; | |
332 | } | |
333 | ||
334 | /** | |
335 | * @brief Decides if new packet should be enqeued or dropped in queue non-empty case | |
336 | * | |
337 | * @param red_cfg [in] config pointer to a RED configuration parameter structure | |
338 | * @param red [in,out] data pointer to RED runtime data | |
339 | * @param q [in] current queue size (measured in packets) | |
340 | * | |
341 | * @return Operation status | |
342 | * @retval 0 enqueue the packet | |
343 | * @retval 1 drop the packet based on max threshold criterion | |
344 | * @retval 2 drop the packet based on mark probability criterion | |
345 | */ | |
346 | static inline int | |
347 | rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg, | |
348 | struct rte_red *red, | |
349 | const unsigned q) | |
350 | { | |
351 | RTE_ASSERT(red_cfg != NULL); | |
352 | RTE_ASSERT(red != NULL); | |
353 | ||
354 | /** | |
355 | * EWMA filter (Sally Floyd and Van Jacobson): | |
356 | * avg = (1 - wq) * avg + wq * q | |
357 | * avg = avg + q * wq - avg * wq | |
358 | * | |
359 | * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get: | |
360 | * avg_s = avg_s + q * 2^N - avg_s * 2^(-n) | |
361 | * | |
362 | * By using shift left/right operations, we get: | |
363 | * avg_s = avg_s + (q << N) - (avg_s >> n) | |
364 | * avg_s += (q << N) - (avg_s >> n) | |
365 | */ | |
366 | ||
367 | /* avg update */ | |
368 | red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2); | |
369 | ||
370 | /* avg < min_th: do not mark the packet */ | |
371 | if (red->avg < red_cfg->min_th) { | |
372 | red->count ++; | |
373 | return 0; | |
374 | } | |
375 | ||
376 | /* min_th <= avg < max_th: mark the packet with pa probability */ | |
377 | if (red->avg < red_cfg->max_th) { | |
378 | if (!__rte_red_drop(red_cfg, red)) { | |
379 | red->count ++; | |
380 | return 0; | |
381 | } | |
382 | ||
383 | red->count = 0; | |
384 | return 2; | |
385 | } | |
386 | ||
387 | /* max_th <= avg: always mark the packet */ | |
388 | red->count = 0; | |
389 | return 1; | |
390 | } | |
391 | ||
392 | /** | |
393 | * @brief Decides if new packet should be enqeued or dropped | |
394 | * Updates run time data based on new queue size value. | |
395 | * Based on new queue average and RED configuration parameters | |
396 | * gives verdict whether to enqueue or drop the packet. | |
397 | * | |
398 | * @param red_cfg [in] config pointer to a RED configuration parameter structure | |
399 | * @param red [in,out] data pointer to RED runtime data | |
400 | * @param q [in] updated queue size in packets | |
401 | * @param time [in] current time stamp | |
402 | * | |
403 | * @return Operation status | |
404 | * @retval 0 enqueue the packet | |
405 | * @retval 1 drop the packet based on max threshold criteria | |
406 | * @retval 2 drop the packet based on mark probability criteria | |
407 | */ | |
408 | static inline int | |
409 | rte_red_enqueue(const struct rte_red_config *red_cfg, | |
410 | struct rte_red *red, | |
411 | const unsigned q, | |
412 | const uint64_t time) | |
413 | { | |
414 | RTE_ASSERT(red_cfg != NULL); | |
415 | RTE_ASSERT(red != NULL); | |
416 | ||
417 | if (q != 0) { | |
418 | return rte_red_enqueue_nonempty(red_cfg, red, q); | |
419 | } else { | |
420 | return rte_red_enqueue_empty(red_cfg, red, time); | |
421 | } | |
422 | } | |
423 | ||
424 | /** | |
425 | * @brief Callback to records time that queue became empty | |
426 | * | |
427 | * @param red [in,out] data pointer to RED runtime data | |
428 | * @param time [in] current time stamp | |
429 | */ | |
430 | static inline void | |
431 | rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time) | |
432 | { | |
433 | red->q_time = time; | |
434 | } | |
435 | ||
436 | #ifdef __cplusplus | |
437 | } | |
438 | #endif | |
439 | ||
440 | #endif /* __RTE_RED_H_INCLUDED__ */ |