]> git.proxmox.com Git - ceph.git/blame - ceph/src/dpdk/lib/librte_sched/rte_red.h
bump version to 12.2.12-pve1
[ceph.git] / ceph / src / dpdk / lib / librte_sched / rte_red.h
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#ifndef __RTE_RED_H_INCLUDED__
35#define __RTE_RED_H_INCLUDED__
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41/**
42 * @file
43 * RTE Random Early Detection (RED)
44 *
45 *
46 ***/
47
48#include <stdint.h>
49#include <limits.h>
50#include <rte_common.h>
51#include <rte_debug.h>
52#include <rte_cycles.h>
53#include <rte_branch_prediction.h>
54
55#define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */
56#define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */
57#define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */
58#define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */
59#define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */
60#define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */
61#define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */
62#define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */
63#define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT)
64#define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
65
66/**
67 * Externs
68 *
69 */
70extern uint32_t rte_red_rand_val;
71extern uint32_t rte_red_rand_seed;
72extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
73extern uint16_t rte_red_pow2_frac_inv[16];
74
75/**
76 * RED configuration parameters passed by user
77 *
78 */
79struct rte_red_params {
80 uint16_t min_th; /**< Minimum threshold for queue (max_th) */
81 uint16_t max_th; /**< Maximum threshold for queue (max_th) */
82 uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
83 uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
84};
85
86/**
87 * RED configuration parameters
88 */
89struct rte_red_config {
90 uint32_t min_th; /**< min_th scaled in fixed-point format */
91 uint32_t max_th; /**< max_th scaled in fixed-point format */
92 uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
93 uint8_t maxp_inv; /**< maxp_inv */
94 uint8_t wq_log2; /**< wq_log2 */
95};
96
97/**
98 * RED run-time data
99 */
100struct rte_red {
101 uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */
102 uint32_t count; /**< Number of packets since last marked packet (count) */
103 uint64_t q_time; /**< Start of the queue idle time (q_time) */
104};
105
106/**
107 * @brief Initialises run-time data
108 *
109 * @param red [in,out] data pointer to RED runtime data
110 *
111 * @return Operation status
112 * @retval 0 success
113 * @retval !0 error
114 */
115int
116rte_red_rt_data_init(struct rte_red *red);
117
118/**
119 * @brief Configures a single RED configuration parameter structure.
120 *
121 * @param red_cfg [in,out] config pointer to a RED configuration parameter structure
122 * @param wq_log2 [in] log2 of the filter weight, valid range is:
123 * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
124 * @param min_th [in] queue minimum threshold in number of packets
125 * @param max_th [in] queue maximum threshold in number of packets
126 * @param maxp_inv [in] inverse maximum mark probability
127 *
128 * @return Operation status
129 * @retval 0 success
130 * @retval !0 error
131 */
132int
133rte_red_config_init(struct rte_red_config *red_cfg,
134 const uint16_t wq_log2,
135 const uint16_t min_th,
136 const uint16_t max_th,
137 const uint16_t maxp_inv);
138
139/**
140 * @brief Generate random number for RED
141 *
142 * Implemenetation based on:
143 * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
144 *
145 * 10 bit shift has been found through empirical tests (was 16).
146 *
147 * @return Random number between 0 and (2^22 - 1)
148 */
149static inline uint32_t
150rte_fast_rand(void)
151{
152 rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
153 return rte_red_rand_seed >> 10;
154}
155
156/**
157 * @brief calculate factor to scale average queue size when queue
158 * becomes empty
159 *
160 * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2)
161 * @param m [in] exponent in the computed value (1 - wq) ^ m
162 *
163 * @return computed value
164 * @retval ((1 - wq) ^ m) scaled in fixed-point format
165 */
166static inline uint16_t
167__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
168{
169 uint32_t n = 0;
170 uint32_t f = 0;
171
172 /**
173 * Basic math tells us that:
174 * a^b = 2^(b * log2(a) )
175 *
176 * in our case:
177 * a = (1-Wq)
178 * b = m
179 * Wq = 1/ (2^log2n)
180 *
181 * So we are computing this equation:
182 * factor = 2 ^ ( m * log2(1-Wq))
183 *
184 * First we are computing:
185 * n = m * log2(1-Wq)
186 *
187 * To avoid dealing with signed numbers log2 values are positive
188 * but they should be negative because (1-Wq) is always < 1.
189 * Contents of log2 table values are also scaled for precision.
190 */
191
192 n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
193
194 /**
195 * The tricky part is computing 2^n, for this I split n into
196 * integer part and fraction part.
197 * f - is fraction part of n
198 * n - is integer part of original n
199 *
200 * Now using basic math we compute 2^n:
201 * 2^(f+n) = 2^f * 2^n
202 * 2^f - we use lookup table
203 * 2^n - can be replaced with bit shift right oeprations
204 */
205
206 f = (n >> 6) & 0xf;
207 n >>= 10;
208
209 if (n < RTE_RED_SCALING)
210 return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
211
212 return 0;
213}
214
215/**
216 * @brief Updates queue average in condition when queue is empty
217 *
218 * Note: packet is never dropped in this particular case.
219 *
220 * @param red_cfg [in] config pointer to a RED configuration parameter structure
221 * @param red [in,out] data pointer to RED runtime data
222 * @param time [in] current time stamp
223 *
224 * @return Operation status
225 * @retval 0 enqueue the packet
226 * @retval 1 drop the packet based on max threshold criterion
227 * @retval 2 drop the packet based on mark probability criterion
228 */
229static inline int
230rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
231 struct rte_red *red,
232 const uint64_t time)
233{
234 uint64_t time_diff = 0, m = 0;
235
236 RTE_ASSERT(red_cfg != NULL);
237 RTE_ASSERT(red != NULL);
238
239 red->count ++;
240
241 /**
242 * We compute avg but we don't compare avg against
243 * min_th or max_th, nor calculate drop probability
244 */
245 time_diff = time - red->q_time;
246
247 /**
248 * m is the number of packets that might have arrived while the queue was empty.
249 * In this case we have time stamps provided by scheduler in byte units (bytes
250 * transmitted on network port). Such time stamp translates into time units as
251 * port speed is fixed but such approach simplifies the code.
252 */
253 m = time_diff / RTE_RED_S;
254
255 /**
256 * Check that m will fit into 16-bit unsigned integer
257 */
258 if (m >= RTE_RED_2POW16) {
259 red->avg = 0;
260 } else {
261 red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
262 }
263
264 return 0;
265}
266
267/**
268 * Drop probability (Sally Floyd and Van Jacobson):
269 *
270 * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
271 * pa = pb / (2 - count * pb)
272 *
273 *
274 * (1 / maxp_inv) * (avg - min_th)
275 * ---------------------------------
276 * max_th - min_th
277 * pa = -----------------------------------------------
278 * count * (1 / maxp_inv) * (avg - min_th)
279 * 2 - -----------------------------------------
280 * max_th - min_th
281 *
282 *
283 * avg - min_th
284 * pa = -----------------------------------------------------------
285 * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
286 *
287 *
288 * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then:
289 *
290 *
291 * avg - min_th
292 * pa = -----------------------------------
293 * pa_const - count * (avg - min_th)
294 */
295
296/**
297 * @brief make a decision to drop or enqueue a packet based on mark probability
298 * criteria
299 *
300 * @param red_cfg [in] config pointer to structure defining RED parameters
301 * @param red [in,out] data pointer to RED runtime data
302 *
303 * @return operation status
304 * @retval 0 enqueue the packet
305 * @retval 1 drop the packet
306 */
307static inline int
308__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
309{
310 uint32_t pa_num = 0; /* numerator of drop-probability */
311 uint32_t pa_den = 0; /* denominator of drop-probability */
312 uint32_t pa_num_count = 0;
313
314 pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
315
316 pa_num_count = red->count * pa_num;
317
318 if (red_cfg->pa_const <= pa_num_count)
319 return 1;
320
321 pa_den = red_cfg->pa_const - pa_num_count;
322
323 /* If drop, generate and save random number to be used next time */
324 if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
325 rte_red_rand_val = rte_fast_rand();
326
327 return 1;
328 }
329
330 /* No drop */
331 return 0;
332}
333
334/**
335 * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
336 *
337 * @param red_cfg [in] config pointer to a RED configuration parameter structure
338 * @param red [in,out] data pointer to RED runtime data
339 * @param q [in] current queue size (measured in packets)
340 *
341 * @return Operation status
342 * @retval 0 enqueue the packet
343 * @retval 1 drop the packet based on max threshold criterion
344 * @retval 2 drop the packet based on mark probability criterion
345 */
346static inline int
347rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
348 struct rte_red *red,
349 const unsigned q)
350{
351 RTE_ASSERT(red_cfg != NULL);
352 RTE_ASSERT(red != NULL);
353
354 /**
355 * EWMA filter (Sally Floyd and Van Jacobson):
356 * avg = (1 - wq) * avg + wq * q
357 * avg = avg + q * wq - avg * wq
358 *
359 * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
360 * avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
361 *
362 * By using shift left/right operations, we get:
363 * avg_s = avg_s + (q << N) - (avg_s >> n)
364 * avg_s += (q << N) - (avg_s >> n)
365 */
366
367 /* avg update */
368 red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
369
370 /* avg < min_th: do not mark the packet */
371 if (red->avg < red_cfg->min_th) {
372 red->count ++;
373 return 0;
374 }
375
376 /* min_th <= avg < max_th: mark the packet with pa probability */
377 if (red->avg < red_cfg->max_th) {
378 if (!__rte_red_drop(red_cfg, red)) {
379 red->count ++;
380 return 0;
381 }
382
383 red->count = 0;
384 return 2;
385 }
386
387 /* max_th <= avg: always mark the packet */
388 red->count = 0;
389 return 1;
390}
391
392/**
393 * @brief Decides if new packet should be enqeued or dropped
394 * Updates run time data based on new queue size value.
395 * Based on new queue average and RED configuration parameters
396 * gives verdict whether to enqueue or drop the packet.
397 *
398 * @param red_cfg [in] config pointer to a RED configuration parameter structure
399 * @param red [in,out] data pointer to RED runtime data
400 * @param q [in] updated queue size in packets
401 * @param time [in] current time stamp
402 *
403 * @return Operation status
404 * @retval 0 enqueue the packet
405 * @retval 1 drop the packet based on max threshold criteria
406 * @retval 2 drop the packet based on mark probability criteria
407 */
408static inline int
409rte_red_enqueue(const struct rte_red_config *red_cfg,
410 struct rte_red *red,
411 const unsigned q,
412 const uint64_t time)
413{
414 RTE_ASSERT(red_cfg != NULL);
415 RTE_ASSERT(red != NULL);
416
417 if (q != 0) {
418 return rte_red_enqueue_nonempty(red_cfg, red, q);
419 } else {
420 return rte_red_enqueue_empty(red_cfg, red, time);
421 }
422}
423
424/**
425 * @brief Callback to records time that queue became empty
426 *
427 * @param red [in,out] data pointer to RED runtime data
428 * @param time [in] current time stamp
429 */
430static inline void
431rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
432{
433 red->q_time = time;
434}
435
436#ifdef __cplusplus
437}
438#endif
439
440#endif /* __RTE_RED_H_INCLUDED__ */