1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
10 #include <sys/queue.h>
12 #include <rte_atomic.h>
13 #include <rte_common.h>
14 #include <rte_cycles.h>
15 #include <rte_per_lcore.h>
16 #include <rte_memory.h>
17 #include <rte_launch.h>
19 #include <rte_lcore.h>
20 #include <rte_branch_prediction.h>
21 #include <rte_spinlock.h>
22 #include <rte_random.h>
23 #include <rte_pause.h>
25 #include "rte_timer.h"
27 LIST_HEAD(rte_timer_list
, rte_timer
);
30 struct rte_timer pending_head
; /**< dummy timer instance to head up list */
31 rte_spinlock_t list_lock
; /**< lock to protect list access */
33 /** per-core variable that true if a timer was updated on this
34 * core since last reset of the variable */
37 /** track the current depth of the skiplist */
38 unsigned curr_skiplist_depth
;
40 unsigned prev_lcore
; /**< used for lcore round robin */
42 /** running timer on this lcore now */
43 struct rte_timer
*running_tim
;
45 #ifdef RTE_LIBRTE_TIMER_DEBUG
46 /** per-lcore statistics */
47 struct rte_timer_debug_stats stats
;
49 } __rte_cache_aligned
;
51 /** per-lcore private info for timers */
52 static struct priv_timer priv_timer
[RTE_MAX_LCORE
];
54 /* when debug is enabled, store some statistics */
55 #ifdef RTE_LIBRTE_TIMER_DEBUG
56 #define __TIMER_STAT_ADD(name, n) do { \
57 unsigned __lcore_id = rte_lcore_id(); \
58 if (__lcore_id < RTE_MAX_LCORE) \
59 priv_timer[__lcore_id].stats.name += (n); \
62 #define __TIMER_STAT_ADD(name, n) do {} while(0)
65 /* Init the timer library. */
67 rte_timer_subsystem_init(void)
71 /* since priv_timer is static, it's zeroed by default, so only init some
74 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++) {
75 rte_spinlock_init(&priv_timer
[lcore_id
].list_lock
);
76 priv_timer
[lcore_id
].prev_lcore
= lcore_id
;
80 /* Initialize the timer handle tim for use */
82 rte_timer_init(struct rte_timer
*tim
)
84 union rte_timer_status status
;
86 status
.state
= RTE_TIMER_STOP
;
87 status
.owner
= RTE_TIMER_NO_OWNER
;
88 tim
->status
.u32
= status
.u32
;
92 * if timer is pending or stopped (or running on the same core than
93 * us), mark timer as configuring, and on success return the previous
97 timer_set_config_state(struct rte_timer
*tim
,
98 union rte_timer_status
*ret_prev_status
)
100 union rte_timer_status prev_status
, status
;
104 lcore_id
= rte_lcore_id();
106 /* wait that the timer is in correct status before update,
107 * and mark it as being configured */
108 while (success
== 0) {
109 prev_status
.u32
= tim
->status
.u32
;
111 /* timer is running on another core
112 * or ready to run on local core, exit
114 if (prev_status
.state
== RTE_TIMER_RUNNING
&&
115 (prev_status
.owner
!= (uint16_t)lcore_id
||
116 tim
!= priv_timer
[lcore_id
].running_tim
))
119 /* timer is being configured on another core */
120 if (prev_status
.state
== RTE_TIMER_CONFIG
)
123 /* here, we know that timer is stopped or pending,
124 * mark it atomically as being configured */
125 status
.state
= RTE_TIMER_CONFIG
;
126 status
.owner
= (int16_t)lcore_id
;
127 success
= rte_atomic32_cmpset(&tim
->status
.u32
,
132 ret_prev_status
->u32
= prev_status
.u32
;
137 * if timer is pending, mark timer as running
140 timer_set_running_state(struct rte_timer
*tim
)
142 union rte_timer_status prev_status
, status
;
143 unsigned lcore_id
= rte_lcore_id();
146 /* wait that the timer is in correct status before update,
147 * and mark it as running */
148 while (success
== 0) {
149 prev_status
.u32
= tim
->status
.u32
;
151 /* timer is not pending anymore */
152 if (prev_status
.state
!= RTE_TIMER_PENDING
)
155 /* here, we know that timer is stopped or pending,
156 * mark it atomically as being configured */
157 status
.state
= RTE_TIMER_RUNNING
;
158 status
.owner
= (int16_t)lcore_id
;
159 success
= rte_atomic32_cmpset(&tim
->status
.u32
,
168 * Return a skiplist level for a new entry.
169 * This probabilistically gives a level with p=1/4 that an entry at level n
170 * will also appear at level n+1.
173 timer_get_skiplist_level(unsigned curr_depth
)
175 #ifdef RTE_LIBRTE_TIMER_DEBUG
176 static uint32_t i
, count
= 0;
177 static uint32_t levels
[MAX_SKIPLIST_DEPTH
] = {0};
180 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
181 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
182 * bit position of a (pseudo)random number.
184 uint32_t rand
= rte_rand() & (UINT32_MAX
- 1);
185 uint32_t level
= rand
== 0 ? MAX_SKIPLIST_DEPTH
: (rte_bsf32(rand
)-1) / 2;
187 /* limit the levels used to one above our current level, so we don't,
188 * for instance, have a level 0 and a level 7 without anything between
190 if (level
> curr_depth
)
192 if (level
>= MAX_SKIPLIST_DEPTH
)
193 level
= MAX_SKIPLIST_DEPTH
-1;
194 #ifdef RTE_LIBRTE_TIMER_DEBUG
197 if (count
% 10000 == 0)
198 for (i
= 0; i
< MAX_SKIPLIST_DEPTH
; i
++)
199 printf("Level %u: %u\n", (unsigned)i
, (unsigned)levels
[i
]);
205 * For a given time value, get the entries at each level which
206 * are <= that time value.
209 timer_get_prev_entries(uint64_t time_val
, unsigned tim_lcore
,
210 struct rte_timer
**prev
)
212 unsigned lvl
= priv_timer
[tim_lcore
].curr_skiplist_depth
;
213 prev
[lvl
] = &priv_timer
[tim_lcore
].pending_head
;
216 prev
[lvl
] = prev
[lvl
+1];
217 while (prev
[lvl
]->sl_next
[lvl
] &&
218 prev
[lvl
]->sl_next
[lvl
]->expire
<= time_val
)
219 prev
[lvl
] = prev
[lvl
]->sl_next
[lvl
];
224 * Given a timer node in the skiplist, find the previous entries for it at
225 * all skiplist levels.
228 timer_get_prev_entries_for_node(struct rte_timer
*tim
, unsigned tim_lcore
,
229 struct rte_timer
**prev
)
232 /* to get a specific entry in the list, look for just lower than the time
233 * values, and then increment on each level individually if necessary
235 timer_get_prev_entries(tim
->expire
- 1, tim_lcore
, prev
);
236 for (i
= priv_timer
[tim_lcore
].curr_skiplist_depth
- 1; i
>= 0; i
--) {
237 while (prev
[i
]->sl_next
[i
] != NULL
&&
238 prev
[i
]->sl_next
[i
] != tim
&&
239 prev
[i
]->sl_next
[i
]->expire
<= tim
->expire
)
240 prev
[i
] = prev
[i
]->sl_next
[i
];
245 * add in list, lock if needed
246 * timer must be in config state
247 * timer must not be in a list
250 timer_add(struct rte_timer
*tim
, unsigned tim_lcore
, int local_is_locked
)
252 unsigned lcore_id
= rte_lcore_id();
254 struct rte_timer
*prev
[MAX_SKIPLIST_DEPTH
+1];
256 /* if timer needs to be scheduled on another core, we need to
257 * lock the list; if it is on local core, we need to lock if
258 * we are not called from rte_timer_manage() */
259 if (tim_lcore
!= lcore_id
|| !local_is_locked
)
260 rte_spinlock_lock(&priv_timer
[tim_lcore
].list_lock
);
262 /* find where exactly this element goes in the list of elements
264 timer_get_prev_entries(tim
->expire
, tim_lcore
, prev
);
266 /* now assign it a new level and add at that level */
267 const unsigned tim_level
= timer_get_skiplist_level(
268 priv_timer
[tim_lcore
].curr_skiplist_depth
);
269 if (tim_level
== priv_timer
[tim_lcore
].curr_skiplist_depth
)
270 priv_timer
[tim_lcore
].curr_skiplist_depth
++;
274 tim
->sl_next
[lvl
] = prev
[lvl
]->sl_next
[lvl
];
275 prev
[lvl
]->sl_next
[lvl
] = tim
;
278 tim
->sl_next
[0] = prev
[0]->sl_next
[0];
279 prev
[0]->sl_next
[0] = tim
;
281 /* save the lowest list entry into the expire field of the dummy hdr
282 * NOTE: this is not atomic on 32-bit*/
283 priv_timer
[tim_lcore
].pending_head
.expire
= priv_timer
[tim_lcore
].\
284 pending_head
.sl_next
[0]->expire
;
286 if (tim_lcore
!= lcore_id
|| !local_is_locked
)
287 rte_spinlock_unlock(&priv_timer
[tim_lcore
].list_lock
);
291 * del from list, lock if needed
292 * timer must be in config state
293 * timer must be in a list
296 timer_del(struct rte_timer
*tim
, union rte_timer_status prev_status
,
299 unsigned lcore_id
= rte_lcore_id();
300 unsigned prev_owner
= prev_status
.owner
;
302 struct rte_timer
*prev
[MAX_SKIPLIST_DEPTH
+1];
304 /* if timer needs is pending another core, we need to lock the
305 * list; if it is on local core, we need to lock if we are not
306 * called from rte_timer_manage() */
307 if (prev_owner
!= lcore_id
|| !local_is_locked
)
308 rte_spinlock_lock(&priv_timer
[prev_owner
].list_lock
);
310 /* save the lowest list entry into the expire field of the dummy hdr.
311 * NOTE: this is not atomic on 32-bit */
312 if (tim
== priv_timer
[prev_owner
].pending_head
.sl_next
[0])
313 priv_timer
[prev_owner
].pending_head
.expire
=
314 ((tim
->sl_next
[0] == NULL
) ? 0 : tim
->sl_next
[0]->expire
);
316 /* adjust pointers from previous entries to point past this */
317 timer_get_prev_entries_for_node(tim
, prev_owner
, prev
);
318 for (i
= priv_timer
[prev_owner
].curr_skiplist_depth
- 1; i
>= 0; i
--) {
319 if (prev
[i
]->sl_next
[i
] == tim
)
320 prev
[i
]->sl_next
[i
] = tim
->sl_next
[i
];
323 /* in case we deleted last entry at a level, adjust down max level */
324 for (i
= priv_timer
[prev_owner
].curr_skiplist_depth
- 1; i
>= 0; i
--)
325 if (priv_timer
[prev_owner
].pending_head
.sl_next
[i
] == NULL
)
326 priv_timer
[prev_owner
].curr_skiplist_depth
--;
330 if (prev_owner
!= lcore_id
|| !local_is_locked
)
331 rte_spinlock_unlock(&priv_timer
[prev_owner
].list_lock
);
334 /* Reset and start the timer associated with the timer handle (private func) */
336 __rte_timer_reset(struct rte_timer
*tim
, uint64_t expire
,
337 uint64_t period
, unsigned tim_lcore
,
338 rte_timer_cb_t fct
, void *arg
,
341 union rte_timer_status prev_status
, status
;
343 unsigned lcore_id
= rte_lcore_id();
345 /* round robin for tim_lcore */
346 if (tim_lcore
== (unsigned)LCORE_ID_ANY
) {
347 if (lcore_id
< RTE_MAX_LCORE
) {
348 /* EAL thread with valid lcore_id */
349 tim_lcore
= rte_get_next_lcore(
350 priv_timer
[lcore_id
].prev_lcore
,
352 priv_timer
[lcore_id
].prev_lcore
= tim_lcore
;
354 /* non-EAL thread do not run rte_timer_manage(),
355 * so schedule the timer on the first enabled lcore. */
356 tim_lcore
= rte_get_next_lcore(LCORE_ID_ANY
, 0, 1);
359 /* wait that the timer is in correct status before update,
360 * and mark it as being configured */
361 ret
= timer_set_config_state(tim
, &prev_status
);
365 __TIMER_STAT_ADD(reset
, 1);
366 if (prev_status
.state
== RTE_TIMER_RUNNING
&&
367 lcore_id
< RTE_MAX_LCORE
) {
368 priv_timer
[lcore_id
].updated
= 1;
371 /* remove it from list */
372 if (prev_status
.state
== RTE_TIMER_PENDING
) {
373 timer_del(tim
, prev_status
, local_is_locked
);
374 __TIMER_STAT_ADD(pending
, -1);
377 tim
->period
= period
;
378 tim
->expire
= expire
;
382 __TIMER_STAT_ADD(pending
, 1);
383 timer_add(tim
, tim_lcore
, local_is_locked
);
385 /* update state: as we are in CONFIG state, only us can modify
386 * the state so we don't need to use cmpset() here */
388 status
.state
= RTE_TIMER_PENDING
;
389 status
.owner
= (int16_t)tim_lcore
;
390 tim
->status
.u32
= status
.u32
;
395 /* Reset and start the timer associated with the timer handle tim */
397 rte_timer_reset(struct rte_timer
*tim
, uint64_t ticks
,
398 enum rte_timer_type type
, unsigned tim_lcore
,
399 rte_timer_cb_t fct
, void *arg
)
401 uint64_t cur_time
= rte_get_timer_cycles();
404 if (unlikely((tim_lcore
!= (unsigned)LCORE_ID_ANY
) &&
405 !(rte_lcore_is_enabled(tim_lcore
) ||
406 rte_lcore_has_role(tim_lcore
, ROLE_SERVICE
))))
409 if (type
== PERIODICAL
)
414 return __rte_timer_reset(tim
, cur_time
+ ticks
, period
, tim_lcore
,
418 /* loop until rte_timer_reset() succeed */
420 rte_timer_reset_sync(struct rte_timer
*tim
, uint64_t ticks
,
421 enum rte_timer_type type
, unsigned tim_lcore
,
422 rte_timer_cb_t fct
, void *arg
)
424 while (rte_timer_reset(tim
, ticks
, type
, tim_lcore
,
429 /* Stop the timer associated with the timer handle tim */
431 rte_timer_stop(struct rte_timer
*tim
)
433 union rte_timer_status prev_status
, status
;
434 unsigned lcore_id
= rte_lcore_id();
437 /* wait that the timer is in correct status before update,
438 * and mark it as being configured */
439 ret
= timer_set_config_state(tim
, &prev_status
);
443 __TIMER_STAT_ADD(stop
, 1);
444 if (prev_status
.state
== RTE_TIMER_RUNNING
&&
445 lcore_id
< RTE_MAX_LCORE
) {
446 priv_timer
[lcore_id
].updated
= 1;
449 /* remove it from list */
450 if (prev_status
.state
== RTE_TIMER_PENDING
) {
451 timer_del(tim
, prev_status
, 0);
452 __TIMER_STAT_ADD(pending
, -1);
455 /* mark timer as stopped */
457 status
.state
= RTE_TIMER_STOP
;
458 status
.owner
= RTE_TIMER_NO_OWNER
;
459 tim
->status
.u32
= status
.u32
;
464 /* loop until rte_timer_stop() succeed */
466 rte_timer_stop_sync(struct rte_timer
*tim
)
468 while (rte_timer_stop(tim
) != 0)
472 /* Test the PENDING status of the timer handle tim */
474 rte_timer_pending(struct rte_timer
*tim
)
476 return tim
->status
.state
== RTE_TIMER_PENDING
;
479 /* must be called periodically, run all timer that expired */
480 void rte_timer_manage(void)
482 union rte_timer_status status
;
483 struct rte_timer
*tim
, *next_tim
;
484 struct rte_timer
*run_first_tim
, **pprev
;
485 unsigned lcore_id
= rte_lcore_id();
486 struct rte_timer
*prev
[MAX_SKIPLIST_DEPTH
+ 1];
490 /* timer manager only runs on EAL thread with valid lcore_id */
491 assert(lcore_id
< RTE_MAX_LCORE
);
493 __TIMER_STAT_ADD(manage
, 1);
494 /* optimize for the case where per-cpu list is empty */
495 if (priv_timer
[lcore_id
].pending_head
.sl_next
[0] == NULL
)
497 cur_time
= rte_get_timer_cycles();
500 /* on 64-bit the value cached in the pending_head.expired will be
501 * updated atomically, so we can consult that for a quick check here
502 * outside the lock */
503 if (likely(priv_timer
[lcore_id
].pending_head
.expire
> cur_time
))
507 /* browse ordered list, add expired timers in 'expired' list */
508 rte_spinlock_lock(&priv_timer
[lcore_id
].list_lock
);
510 /* if nothing to do just unlock and return */
511 if (priv_timer
[lcore_id
].pending_head
.sl_next
[0] == NULL
||
512 priv_timer
[lcore_id
].pending_head
.sl_next
[0]->expire
> cur_time
) {
513 rte_spinlock_unlock(&priv_timer
[lcore_id
].list_lock
);
517 /* save start of list of expired timers */
518 tim
= priv_timer
[lcore_id
].pending_head
.sl_next
[0];
520 /* break the existing list at current time point */
521 timer_get_prev_entries(cur_time
, lcore_id
, prev
);
522 for (i
= priv_timer
[lcore_id
].curr_skiplist_depth
-1; i
>= 0; i
--) {
523 if (prev
[i
] == &priv_timer
[lcore_id
].pending_head
)
525 priv_timer
[lcore_id
].pending_head
.sl_next
[i
] =
527 if (prev
[i
]->sl_next
[i
] == NULL
)
528 priv_timer
[lcore_id
].curr_skiplist_depth
--;
529 prev
[i
] ->sl_next
[i
] = NULL
;
532 /* transition run-list from PENDING to RUNNING */
534 pprev
= &run_first_tim
;
536 for ( ; tim
!= NULL
; tim
= next_tim
) {
537 next_tim
= tim
->sl_next
[0];
539 ret
= timer_set_running_state(tim
);
540 if (likely(ret
== 0)) {
541 pprev
= &tim
->sl_next
[0];
543 /* another core is trying to re-config this one,
544 * remove it from local expired list
550 /* update the next to expire timer value */
551 priv_timer
[lcore_id
].pending_head
.expire
=
552 (priv_timer
[lcore_id
].pending_head
.sl_next
[0] == NULL
) ? 0 :
553 priv_timer
[lcore_id
].pending_head
.sl_next
[0]->expire
;
555 rte_spinlock_unlock(&priv_timer
[lcore_id
].list_lock
);
557 /* now scan expired list and call callbacks */
558 for (tim
= run_first_tim
; tim
!= NULL
; tim
= next_tim
) {
559 next_tim
= tim
->sl_next
[0];
560 priv_timer
[lcore_id
].updated
= 0;
561 priv_timer
[lcore_id
].running_tim
= tim
;
563 /* execute callback function with list unlocked */
564 tim
->f(tim
, tim
->arg
);
566 __TIMER_STAT_ADD(pending
, -1);
567 /* the timer was stopped or reloaded by the callback
568 * function, we have nothing to do here */
569 if (priv_timer
[lcore_id
].updated
== 1)
572 if (tim
->period
== 0) {
573 /* remove from done list and mark timer as stopped */
574 status
.state
= RTE_TIMER_STOP
;
575 status
.owner
= RTE_TIMER_NO_OWNER
;
577 tim
->status
.u32
= status
.u32
;
580 /* keep it in list and mark timer as pending */
581 rte_spinlock_lock(&priv_timer
[lcore_id
].list_lock
);
582 status
.state
= RTE_TIMER_PENDING
;
583 __TIMER_STAT_ADD(pending
, 1);
584 status
.owner
= (int16_t)lcore_id
;
586 tim
->status
.u32
= status
.u32
;
587 __rte_timer_reset(tim
, tim
->expire
+ tim
->period
,
588 tim
->period
, lcore_id
, tim
->f
, tim
->arg
, 1);
589 rte_spinlock_unlock(&priv_timer
[lcore_id
].list_lock
);
592 priv_timer
[lcore_id
].running_tim
= NULL
;
595 /* dump statistics about timers */
596 void rte_timer_dump_stats(FILE *f
)
598 #ifdef RTE_LIBRTE_TIMER_DEBUG
599 struct rte_timer_debug_stats sum
;
602 memset(&sum
, 0, sizeof(sum
));
603 for (lcore_id
= 0; lcore_id
< RTE_MAX_LCORE
; lcore_id
++) {
604 sum
.reset
+= priv_timer
[lcore_id
].stats
.reset
;
605 sum
.stop
+= priv_timer
[lcore_id
].stats
.stop
;
606 sum
.manage
+= priv_timer
[lcore_id
].stats
.manage
;
607 sum
.pending
+= priv_timer
[lcore_id
].stats
.pending
;
609 fprintf(f
, "Timer statistics:\n");
610 fprintf(f
, " reset = %"PRIu64
"\n", sum
.reset
);
611 fprintf(f
, " stop = %"PRIu64
"\n", sum
.stop
);
612 fprintf(f
, " manage = %"PRIu64
"\n", sum
.manage
);
613 fprintf(f
, " pending = %"PRIu64
"\n", sum
.pending
);
615 fprintf(f
, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");