]>
git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/lib/librte_sched/rte_red.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
5 #ifndef __RTE_RED_H_INCLUDED__
6 #define __RTE_RED_H_INCLUDED__
14 * RTE Random Early Detection (RED)
21 #include <rte_common.h>
22 #include <rte_debug.h>
23 #include <rte_cycles.h>
24 #include <rte_branch_prediction.h>
26 #define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */
27 #define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */
28 #define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */
29 #define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */
30 #define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */
31 #define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */
32 #define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */
33 #define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */
34 #define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT)
35 #define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
41 extern uint32_t rte_red_rand_val
;
42 extern uint32_t rte_red_rand_seed
;
43 extern uint16_t rte_red_log2_1_minus_Wq
[RTE_RED_WQ_LOG2_NUM
];
44 extern uint16_t rte_red_pow2_frac_inv
[16];
47 * RED configuration parameters passed by user
50 struct rte_red_params
{
51 uint16_t min_th
; /**< Minimum threshold for queue (max_th) */
52 uint16_t max_th
; /**< Maximum threshold for queue (max_th) */
53 uint16_t maxp_inv
; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
54 uint16_t wq_log2
; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
58 * RED configuration parameters
60 struct rte_red_config
{
61 uint32_t min_th
; /**< min_th scaled in fixed-point format */
62 uint32_t max_th
; /**< max_th scaled in fixed-point format */
63 uint32_t pa_const
; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
64 uint8_t maxp_inv
; /**< maxp_inv */
65 uint8_t wq_log2
; /**< wq_log2 */
72 uint32_t avg
; /**< Average queue size (avg), scaled in fixed-point format */
73 uint32_t count
; /**< Number of packets since last marked packet (count) */
74 uint64_t q_time
; /**< Start of the queue idle time (q_time) */
78 * @brief Initialises run-time data
80 * @param red [in,out] data pointer to RED runtime data
82 * @return Operation status
87 rte_red_rt_data_init(struct rte_red
*red
);
90 * @brief Configures a single RED configuration parameter structure.
92 * @param red_cfg [in,out] config pointer to a RED configuration parameter structure
93 * @param wq_log2 [in] log2 of the filter weight, valid range is:
94 * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
95 * @param min_th [in] queue minimum threshold in number of packets
96 * @param max_th [in] queue maximum threshold in number of packets
97 * @param maxp_inv [in] inverse maximum mark probability
99 * @return Operation status
104 rte_red_config_init(struct rte_red_config
*red_cfg
,
105 const uint16_t wq_log2
,
106 const uint16_t min_th
,
107 const uint16_t max_th
,
108 const uint16_t maxp_inv
);
111 * @brief Generate random number for RED
113 * Implementation based on:
114 * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
116 * 10 bit shift has been found through empirical tests (was 16).
118 * @return Random number between 0 and (2^22 - 1)
120 static inline uint32_t
123 rte_red_rand_seed
= (214013 * rte_red_rand_seed
) + 2531011;
124 return rte_red_rand_seed
>> 10;
128 * @brief calculate factor to scale average queue size when queue
131 * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2)
132 * @param m [in] exponent in the computed value (1 - wq) ^ m
134 * @return computed value
135 * @retval ((1 - wq) ^ m) scaled in fixed-point format
137 static inline uint16_t
138 __rte_red_calc_qempty_factor(uint8_t wq_log2
, uint16_t m
)
144 * Basic math tells us that:
145 * a^b = 2^(b * log2(a) )
152 * So we are computing this equation:
153 * factor = 2 ^ ( m * log2(1-Wq))
155 * First we are computing:
158 * To avoid dealing with signed numbers log2 values are positive
159 * but they should be negative because (1-Wq) is always < 1.
160 * Contents of log2 table values are also scaled for precision.
163 n
= m
* rte_red_log2_1_minus_Wq
[wq_log2
- RTE_RED_WQ_LOG2_MIN
];
166 * The tricky part is computing 2^n, for this I split n into
167 * integer part and fraction part.
168 * f - is fraction part of n
169 * n - is integer part of original n
171 * Now using basic math we compute 2^n:
172 * 2^(f+n) = 2^f * 2^n
173 * 2^f - we use lookup table
174 * 2^n - can be replaced with bit shift right operations
180 if (n
< RTE_RED_SCALING
)
181 return (uint16_t) ((rte_red_pow2_frac_inv
[f
] + (1 << (n
- 1))) >> n
);
187 * @brief Updates queue average in condition when queue is empty
189 * Note: packet is never dropped in this particular case.
191 * @param red_cfg [in] config pointer to a RED configuration parameter structure
192 * @param red [in,out] data pointer to RED runtime data
193 * @param time [in] current time stamp
195 * @return Operation status
196 * @retval 0 enqueue the packet
197 * @retval 1 drop the packet based on max threshold criterion
198 * @retval 2 drop the packet based on mark probability criterion
201 rte_red_enqueue_empty(const struct rte_red_config
*red_cfg
,
205 uint64_t time_diff
= 0, m
= 0;
207 RTE_ASSERT(red_cfg
!= NULL
);
208 RTE_ASSERT(red
!= NULL
);
213 * We compute avg but we don't compare avg against
214 * min_th or max_th, nor calculate drop probability
216 time_diff
= time
- red
->q_time
;
219 * m is the number of packets that might have arrived while the queue was empty.
220 * In this case we have time stamps provided by scheduler in byte units (bytes
221 * transmitted on network port). Such time stamp translates into time units as
222 * port speed is fixed but such approach simplifies the code.
224 m
= time_diff
/ RTE_RED_S
;
227 * Check that m will fit into 16-bit unsigned integer
229 if (m
>= RTE_RED_2POW16
) {
232 red
->avg
= (red
->avg
>> RTE_RED_SCALING
) * __rte_red_calc_qempty_factor(red_cfg
->wq_log2
, (uint16_t) m
);
239 * Drop probability (Sally Floyd and Van Jacobson):
241 * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
242 * pa = pb / (2 - count * pb)
245 * (1 / maxp_inv) * (avg - min_th)
246 * ---------------------------------
248 * pa = -----------------------------------------------
249 * count * (1 / maxp_inv) * (avg - min_th)
250 * 2 - -----------------------------------------
255 * pa = -----------------------------------------------------------
256 * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
259 * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then:
263 * pa = -----------------------------------
264 * pa_const - count * (avg - min_th)
268 * @brief make a decision to drop or enqueue a packet based on mark probability
271 * @param red_cfg [in] config pointer to structure defining RED parameters
272 * @param red [in,out] data pointer to RED runtime data
274 * @return operation status
275 * @retval 0 enqueue the packet
276 * @retval 1 drop the packet
279 __rte_red_drop(const struct rte_red_config
*red_cfg
, struct rte_red
*red
)
281 uint32_t pa_num
= 0; /* numerator of drop-probability */
282 uint32_t pa_den
= 0; /* denominator of drop-probability */
283 uint32_t pa_num_count
= 0;
285 pa_num
= (red
->avg
- red_cfg
->min_th
) >> (red_cfg
->wq_log2
);
287 pa_num_count
= red
->count
* pa_num
;
289 if (red_cfg
->pa_const
<= pa_num_count
)
292 pa_den
= red_cfg
->pa_const
- pa_num_count
;
294 /* If drop, generate and save random number to be used next time */
295 if (unlikely((rte_red_rand_val
% pa_den
) < pa_num
)) {
296 rte_red_rand_val
= rte_fast_rand();
306 * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
308 * @param red_cfg [in] config pointer to a RED configuration parameter structure
309 * @param red [in,out] data pointer to RED runtime data
310 * @param q [in] current queue size (measured in packets)
312 * @return Operation status
313 * @retval 0 enqueue the packet
314 * @retval 1 drop the packet based on max threshold criterion
315 * @retval 2 drop the packet based on mark probability criterion
318 rte_red_enqueue_nonempty(const struct rte_red_config
*red_cfg
,
322 RTE_ASSERT(red_cfg
!= NULL
);
323 RTE_ASSERT(red
!= NULL
);
326 * EWMA filter (Sally Floyd and Van Jacobson):
327 * avg = (1 - wq) * avg + wq * q
328 * avg = avg + q * wq - avg * wq
330 * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
331 * avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
333 * By using shift left/right operations, we get:
334 * avg_s = avg_s + (q << N) - (avg_s >> n)
335 * avg_s += (q << N) - (avg_s >> n)
339 red
->avg
+= (q
<< RTE_RED_SCALING
) - (red
->avg
>> red_cfg
->wq_log2
);
341 /* avg < min_th: do not mark the packet */
342 if (red
->avg
< red_cfg
->min_th
) {
347 /* min_th <= avg < max_th: mark the packet with pa probability */
348 if (red
->avg
< red_cfg
->max_th
) {
349 if (!__rte_red_drop(red_cfg
, red
)) {
358 /* max_th <= avg: always mark the packet */
364 * @brief Decides if new packet should be enqeued or dropped
365 * Updates run time data based on new queue size value.
366 * Based on new queue average and RED configuration parameters
367 * gives verdict whether to enqueue or drop the packet.
369 * @param red_cfg [in] config pointer to a RED configuration parameter structure
370 * @param red [in,out] data pointer to RED runtime data
371 * @param q [in] updated queue size in packets
372 * @param time [in] current time stamp
374 * @return Operation status
375 * @retval 0 enqueue the packet
376 * @retval 1 drop the packet based on max threshold criteria
377 * @retval 2 drop the packet based on mark probability criteria
380 rte_red_enqueue(const struct rte_red_config
*red_cfg
,
385 RTE_ASSERT(red_cfg
!= NULL
);
386 RTE_ASSERT(red
!= NULL
);
389 return rte_red_enqueue_nonempty(red_cfg
, red
, q
);
391 return rte_red_enqueue_empty(red_cfg
, red
, time
);
396 * @brief Callback to records time that queue became empty
398 * @param red [in,out] data pointer to RED runtime data
399 * @param time [in] current time stamp
402 rte_red_mark_queue_empty(struct rte_red
*red
, const uint64_t time
)
411 #endif /* __RTE_RED_H_INCLUDED__ */