]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/lib/librte_timer/rte_timer.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / dpdk / lib / librte_timer / rte_timer.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7c673cae
FG
3 */
4
5#include <string.h>
6#include <stdio.h>
7#include <stdint.h>
9f95a23c 8#include <stdbool.h>
7c673cae
FG
9#include <inttypes.h>
10#include <assert.h>
11#include <sys/queue.h>
12
7c673cae
FG
13#include <rte_common.h>
14#include <rte_cycles.h>
f67539c2 15#include <rte_eal_memconfig.h>
7c673cae
FG
16#include <rte_per_lcore.h>
17#include <rte_memory.h>
7c673cae
FG
18#include <rte_launch.h>
19#include <rte_eal.h>
7c673cae
FG
20#include <rte_lcore.h>
21#include <rte_branch_prediction.h>
22#include <rte_spinlock.h>
23#include <rte_random.h>
11fdf7f2 24#include <rte_pause.h>
9f95a23c
TL
25#include <rte_memzone.h>
26#include <rte_malloc.h>
9f95a23c 27#include <rte_errno.h>
7c673cae
FG
28
29#include "rte_timer.h"
30
9f95a23c
TL
31/**
32 * Per-lcore info for timers.
33 */
7c673cae
FG
34struct priv_timer {
35 struct rte_timer pending_head; /**< dummy timer instance to head up list */
36 rte_spinlock_t list_lock; /**< lock to protect list access */
37
38 /** per-core variable that true if a timer was updated on this
39 * core since last reset of the variable */
40 int updated;
41
42 /** track the current depth of the skiplist */
43 unsigned curr_skiplist_depth;
44
45 unsigned prev_lcore; /**< used for lcore round robin */
46
47 /** running timer on this lcore now */
48 struct rte_timer *running_tim;
49
50#ifdef RTE_LIBRTE_TIMER_DEBUG
51 /** per-lcore statistics */
52 struct rte_timer_debug_stats stats;
53#endif
54} __rte_cache_aligned;
55
9f95a23c
TL
56#define FL_ALLOCATED (1 << 0)
57struct rte_timer_data {
58 struct priv_timer priv_timer[RTE_MAX_LCORE];
59 uint8_t internal_flags;
60};
61
62#define RTE_MAX_DATA_ELS 64
f67539c2
TL
63static const struct rte_memzone *rte_timer_data_mz;
64static int *volatile rte_timer_mz_refcnt;
9f95a23c
TL
65static struct rte_timer_data *rte_timer_data_arr;
66static const uint32_t default_data_id;
67static uint32_t rte_timer_subsystem_initialized;
68
7c673cae
FG
69/* when debug is enabled, store some statistics */
70#ifdef RTE_LIBRTE_TIMER_DEBUG
9f95a23c 71#define __TIMER_STAT_ADD(priv_timer, name, n) do { \
7c673cae
FG
72 unsigned __lcore_id = rte_lcore_id(); \
73 if (__lcore_id < RTE_MAX_LCORE) \
74 priv_timer[__lcore_id].stats.name += (n); \
75 } while(0)
76#else
9f95a23c 77#define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
7c673cae
FG
78#endif
79
9f95a23c
TL
80static inline int
81timer_data_valid(uint32_t id)
82{
f67539c2
TL
83 return rte_timer_data_arr &&
84 (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
9f95a23c
TL
85}
86
87/* validate ID and retrieve timer data pointer, or return error value */
88#define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do { \
89 if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id)) \
90 return retval; \
91 timer_data = &rte_timer_data_arr[id]; \
92} while (0)
93
f67539c2 94int
9f95a23c
TL
95rte_timer_data_alloc(uint32_t *id_ptr)
96{
97 int i;
98 struct rte_timer_data *data;
99
100 if (!rte_timer_subsystem_initialized)
101 return -ENOMEM;
102
103 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
104 data = &rte_timer_data_arr[i];
105 if (!(data->internal_flags & FL_ALLOCATED)) {
106 data->internal_flags |= FL_ALLOCATED;
107
108 if (id_ptr)
109 *id_ptr = i;
110
111 return 0;
112 }
113 }
114
115 return -ENOSPC;
116}
117
f67539c2 118int
9f95a23c
TL
119rte_timer_data_dealloc(uint32_t id)
120{
121 struct rte_timer_data *timer_data;
122 TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
123
124 timer_data->internal_flags &= ~(FL_ALLOCATED);
125
126 return 0;
127}
128
9f95a23c
TL
129/* Init the timer library. Allocate an array of timer data structs in shared
130 * memory, and allocate the zeroth entry for use with original timer
131 * APIs. Since the intersection of the sets of lcore ids in primary and
132 * secondary processes should be empty, the zeroth entry can be shared by
133 * multiple processes.
134 */
135int
f67539c2 136rte_timer_subsystem_init(void)
9f95a23c
TL
137{
138 const struct rte_memzone *mz;
139 struct rte_timer_data *data;
140 int i, lcore_id;
141 static const char *mz_name = "rte_timer_mz";
142 const size_t data_arr_size =
f67539c2
TL
143 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
144 const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
9f95a23c
TL
145 bool do_full_init = true;
146
f67539c2
TL
147 rte_mcfg_timer_lock();
148
149 if (rte_timer_subsystem_initialized) {
150 rte_mcfg_timer_unlock();
9f95a23c 151 return -EALREADY;
f67539c2 152 }
9f95a23c 153
f67539c2 154 mz = rte_memzone_lookup(mz_name);
9f95a23c 155 if (mz == NULL) {
f67539c2
TL
156 mz = rte_memzone_reserve_aligned(mz_name, mem_size,
157 SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
158 if (mz == NULL) {
159 rte_mcfg_timer_unlock();
9f95a23c 160 return -ENOMEM;
f67539c2
TL
161 }
162 do_full_init = true;
163 } else
164 do_full_init = false;
9f95a23c 165
f67539c2 166 rte_timer_data_mz = mz;
9f95a23c 167 rte_timer_data_arr = mz->addr;
f67539c2 168 rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
9f95a23c
TL
169
170 if (do_full_init) {
171 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
172 data = &rte_timer_data_arr[i];
173
174 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
175 lcore_id++) {
176 rte_spinlock_init(
177 &data->priv_timer[lcore_id].list_lock);
178 data->priv_timer[lcore_id].prev_lcore =
179 lcore_id;
180 }
181 }
182 }
183
184 rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
f67539c2 185 (*rte_timer_mz_refcnt)++;
9f95a23c
TL
186
187 rte_timer_subsystem_initialized = 1;
188
f67539c2
TL
189 rte_mcfg_timer_unlock();
190
9f95a23c
TL
191 return 0;
192}
9f95a23c 193
f67539c2 194void
9f95a23c
TL
195rte_timer_subsystem_finalize(void)
196{
f67539c2
TL
197 rte_mcfg_timer_lock();
198
199 if (!rte_timer_subsystem_initialized) {
200 rte_mcfg_timer_unlock();
9f95a23c 201 return;
f67539c2
TL
202 }
203
204 if (--(*rte_timer_mz_refcnt) == 0)
205 rte_memzone_free(rte_timer_data_mz);
9f95a23c
TL
206
207 rte_timer_subsystem_initialized = 0;
f67539c2
TL
208
209 rte_mcfg_timer_unlock();
9f95a23c 210}
7c673cae
FG
211
212/* Initialize the timer handle tim for use */
213void
214rte_timer_init(struct rte_timer *tim)
215{
216 union rte_timer_status status;
217
218 status.state = RTE_TIMER_STOP;
219 status.owner = RTE_TIMER_NO_OWNER;
f67539c2 220 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED);
7c673cae
FG
221}
222
223/*
224 * if timer is pending or stopped (or running on the same core than
225 * us), mark timer as configuring, and on success return the previous
226 * status of the timer
227 */
228static int
229timer_set_config_state(struct rte_timer *tim,
9f95a23c
TL
230 union rte_timer_status *ret_prev_status,
231 struct priv_timer *priv_timer)
7c673cae
FG
232{
233 union rte_timer_status prev_status, status;
234 int success = 0;
235 unsigned lcore_id;
236
237 lcore_id = rte_lcore_id();
238
239 /* wait that the timer is in correct status before update,
240 * and mark it as being configured */
f67539c2 241 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
7c673cae 242
f67539c2 243 while (success == 0) {
7c673cae
FG
244 /* timer is running on another core
245 * or ready to run on local core, exit
246 */
247 if (prev_status.state == RTE_TIMER_RUNNING &&
248 (prev_status.owner != (uint16_t)lcore_id ||
249 tim != priv_timer[lcore_id].running_tim))
250 return -1;
251
252 /* timer is being configured on another core */
253 if (prev_status.state == RTE_TIMER_CONFIG)
254 return -1;
255
256 /* here, we know that timer is stopped or pending,
257 * mark it atomically as being configured */
258 status.state = RTE_TIMER_CONFIG;
259 status.owner = (int16_t)lcore_id;
f67539c2
TL
260 /* CONFIG states are acting as locked states. If the
261 * timer is in CONFIG state, the state cannot be changed
262 * by other threads. So, we should use ACQUIRE here.
263 */
264 success = __atomic_compare_exchange_n(&tim->status.u32,
265 &prev_status.u32,
266 status.u32, 0,
267 __ATOMIC_ACQUIRE,
268 __ATOMIC_RELAXED);
7c673cae
FG
269 }
270
271 ret_prev_status->u32 = prev_status.u32;
272 return 0;
273}
274
275/*
276 * if timer is pending, mark timer as running
277 */
278static int
279timer_set_running_state(struct rte_timer *tim)
280{
281 union rte_timer_status prev_status, status;
282 unsigned lcore_id = rte_lcore_id();
283 int success = 0;
284
285 /* wait that the timer is in correct status before update,
286 * and mark it as running */
f67539c2 287 prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
7c673cae 288
f67539c2 289 while (success == 0) {
7c673cae
FG
290 /* timer is not pending anymore */
291 if (prev_status.state != RTE_TIMER_PENDING)
292 return -1;
293
f67539c2
TL
294 /* we know that the timer will be pending at this point
295 * mark it atomically as being running
296 */
7c673cae
FG
297 status.state = RTE_TIMER_RUNNING;
298 status.owner = (int16_t)lcore_id;
f67539c2
TL
299 /* RUNNING states are acting as locked states. If the
300 * timer is in RUNNING state, the state cannot be changed
301 * by other threads. So, we should use ACQUIRE here.
302 */
303 success = __atomic_compare_exchange_n(&tim->status.u32,
304 &prev_status.u32,
305 status.u32, 0,
306 __ATOMIC_ACQUIRE,
307 __ATOMIC_RELAXED);
7c673cae
FG
308 }
309
310 return 0;
311}
312
313/*
314 * Return a skiplist level for a new entry.
11fdf7f2 315 * This probabilistically gives a level with p=1/4 that an entry at level n
7c673cae
FG
316 * will also appear at level n+1.
317 */
318static uint32_t
319timer_get_skiplist_level(unsigned curr_depth)
320{
321#ifdef RTE_LIBRTE_TIMER_DEBUG
322 static uint32_t i, count = 0;
323 static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
324#endif
325
326 /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
327 * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
328 * bit position of a (pseudo)random number.
329 */
330 uint32_t rand = rte_rand() & (UINT32_MAX - 1);
331 uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
332
333 /* limit the levels used to one above our current level, so we don't,
334 * for instance, have a level 0 and a level 7 without anything between
335 */
336 if (level > curr_depth)
337 level = curr_depth;
338 if (level >= MAX_SKIPLIST_DEPTH)
339 level = MAX_SKIPLIST_DEPTH-1;
340#ifdef RTE_LIBRTE_TIMER_DEBUG
341 count ++;
342 levels[level]++;
343 if (count % 10000 == 0)
344 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
345 printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
346#endif
347 return level;
348}
349
350/*
351 * For a given time value, get the entries at each level which
352 * are <= that time value.
353 */
354static void
355timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
9f95a23c 356 struct rte_timer **prev, struct priv_timer *priv_timer)
7c673cae
FG
357{
358 unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
359 prev[lvl] = &priv_timer[tim_lcore].pending_head;
360 while(lvl != 0) {
361 lvl--;
362 prev[lvl] = prev[lvl+1];
363 while (prev[lvl]->sl_next[lvl] &&
364 prev[lvl]->sl_next[lvl]->expire <= time_val)
365 prev[lvl] = prev[lvl]->sl_next[lvl];
366 }
367}
368
369/*
370 * Given a timer node in the skiplist, find the previous entries for it at
371 * all skiplist levels.
372 */
373static void
374timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
9f95a23c
TL
375 struct rte_timer **prev,
376 struct priv_timer *priv_timer)
7c673cae
FG
377{
378 int i;
9f95a23c 379
7c673cae
FG
380 /* to get a specific entry in the list, look for just lower than the time
381 * values, and then increment on each level individually if necessary
382 */
9f95a23c 383 timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
7c673cae
FG
384 for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
385 while (prev[i]->sl_next[i] != NULL &&
386 prev[i]->sl_next[i] != tim &&
387 prev[i]->sl_next[i]->expire <= tim->expire)
388 prev[i] = prev[i]->sl_next[i];
389 }
390}
391
9f95a23c
TL
392/* call with lock held as necessary
393 * add in list
7c673cae
FG
394 * timer must be in config state
395 * timer must not be in a list
396 */
397static void
9f95a23c
TL
398timer_add(struct rte_timer *tim, unsigned int tim_lcore,
399 struct priv_timer *priv_timer)
7c673cae 400{
7c673cae
FG
401 unsigned lvl;
402 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
403
7c673cae
FG
404 /* find where exactly this element goes in the list of elements
405 * for each depth. */
9f95a23c 406 timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
7c673cae
FG
407
408 /* now assign it a new level and add at that level */
409 const unsigned tim_level = timer_get_skiplist_level(
410 priv_timer[tim_lcore].curr_skiplist_depth);
411 if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
412 priv_timer[tim_lcore].curr_skiplist_depth++;
413
414 lvl = tim_level;
415 while (lvl > 0) {
416 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
417 prev[lvl]->sl_next[lvl] = tim;
418 lvl--;
419 }
420 tim->sl_next[0] = prev[0]->sl_next[0];
421 prev[0]->sl_next[0] = tim;
422
423 /* save the lowest list entry into the expire field of the dummy hdr
424 * NOTE: this is not atomic on 32-bit*/
425 priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
426 pending_head.sl_next[0]->expire;
7c673cae
FG
427}
428
429/*
430 * del from list, lock if needed
431 * timer must be in config state
432 * timer must be in a list
433 */
434static void
435timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
9f95a23c 436 int local_is_locked, struct priv_timer *priv_timer)
7c673cae
FG
437{
438 unsigned lcore_id = rte_lcore_id();
439 unsigned prev_owner = prev_status.owner;
440 int i;
441 struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
442
443 /* if timer needs is pending another core, we need to lock the
444 * list; if it is on local core, we need to lock if we are not
445 * called from rte_timer_manage() */
446 if (prev_owner != lcore_id || !local_is_locked)
447 rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
448
449 /* save the lowest list entry into the expire field of the dummy hdr.
450 * NOTE: this is not atomic on 32-bit */
451 if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
452 priv_timer[prev_owner].pending_head.expire =
453 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
454
455 /* adjust pointers from previous entries to point past this */
9f95a23c 456 timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
7c673cae
FG
457 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
458 if (prev[i]->sl_next[i] == tim)
459 prev[i]->sl_next[i] = tim->sl_next[i];
460 }
461
462 /* in case we deleted last entry at a level, adjust down max level */
463 for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
464 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
465 priv_timer[prev_owner].curr_skiplist_depth --;
466 else
467 break;
468
469 if (prev_owner != lcore_id || !local_is_locked)
470 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
471}
472
473/* Reset and start the timer associated with the timer handle (private func) */
474static int
475__rte_timer_reset(struct rte_timer *tim, uint64_t expire,
476 uint64_t period, unsigned tim_lcore,
477 rte_timer_cb_t fct, void *arg,
9f95a23c
TL
478 int local_is_locked,
479 struct rte_timer_data *timer_data)
7c673cae
FG
480{
481 union rte_timer_status prev_status, status;
482 int ret;
483 unsigned lcore_id = rte_lcore_id();
9f95a23c 484 struct priv_timer *priv_timer = timer_data->priv_timer;
7c673cae
FG
485
486 /* round robin for tim_lcore */
487 if (tim_lcore == (unsigned)LCORE_ID_ANY) {
488 if (lcore_id < RTE_MAX_LCORE) {
489 /* EAL thread with valid lcore_id */
490 tim_lcore = rte_get_next_lcore(
491 priv_timer[lcore_id].prev_lcore,
492 0, 1);
493 priv_timer[lcore_id].prev_lcore = tim_lcore;
494 } else
495 /* non-EAL thread do not run rte_timer_manage(),
496 * so schedule the timer on the first enabled lcore. */
497 tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
498 }
499
500 /* wait that the timer is in correct status before update,
501 * and mark it as being configured */
9f95a23c 502 ret = timer_set_config_state(tim, &prev_status, priv_timer);
7c673cae
FG
503 if (ret < 0)
504 return -1;
505
9f95a23c 506 __TIMER_STAT_ADD(priv_timer, reset, 1);
7c673cae
FG
507 if (prev_status.state == RTE_TIMER_RUNNING &&
508 lcore_id < RTE_MAX_LCORE) {
509 priv_timer[lcore_id].updated = 1;
510 }
511
512 /* remove it from list */
513 if (prev_status.state == RTE_TIMER_PENDING) {
9f95a23c
TL
514 timer_del(tim, prev_status, local_is_locked, priv_timer);
515 __TIMER_STAT_ADD(priv_timer, pending, -1);
7c673cae
FG
516 }
517
518 tim->period = period;
519 tim->expire = expire;
520 tim->f = fct;
521 tim->arg = arg;
522
9f95a23c
TL
523 /* if timer needs to be scheduled on another core, we need to
524 * lock the destination list; if it is on local core, we need to lock if
525 * we are not called from rte_timer_manage()
526 */
527 if (tim_lcore != lcore_id || !local_is_locked)
528 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
529
530 __TIMER_STAT_ADD(priv_timer, pending, 1);
531 timer_add(tim, tim_lcore, priv_timer);
7c673cae
FG
532
533 /* update state: as we are in CONFIG state, only us can modify
534 * the state so we don't need to use cmpset() here */
7c673cae
FG
535 status.state = RTE_TIMER_PENDING;
536 status.owner = (int16_t)tim_lcore;
f67539c2
TL
537 /* The "RELEASE" ordering guarantees the memory operations above
538 * the status update are observed before the update by all threads
539 */
540 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
7c673cae 541
9f95a23c
TL
542 if (tim_lcore != lcore_id || !local_is_locked)
543 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
544
7c673cae
FG
545 return 0;
546}
547
548/* Reset and start the timer associated with the timer handle tim */
549int
f67539c2 550rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
9f95a23c
TL
551 enum rte_timer_type type, unsigned int tim_lcore,
552 rte_timer_cb_t fct, void *arg)
553{
554 return rte_timer_alt_reset(default_data_id, tim, ticks, type,
555 tim_lcore, fct, arg);
556}
f67539c2
TL
557
558int
9f95a23c
TL
559rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
560 uint64_t ticks, enum rte_timer_type type,
561 unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
562{
563 uint64_t cur_time = rte_get_timer_cycles();
564 uint64_t period;
565 struct rte_timer_data *timer_data;
566
567 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
568
569 if (type == PERIODICAL)
570 period = ticks;
571 else
572 period = 0;
573
574 return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
575 fct, arg, 0, timer_data);
7c673cae
FG
576}
577
578/* loop until rte_timer_reset() succeed */
579void
580rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
581 enum rte_timer_type type, unsigned tim_lcore,
582 rte_timer_cb_t fct, void *arg)
583{
584 while (rte_timer_reset(tim, ticks, type, tim_lcore,
585 fct, arg) != 0)
586 rte_pause();
587}
588
9f95a23c
TL
589static int
590__rte_timer_stop(struct rte_timer *tim, int local_is_locked,
591 struct rte_timer_data *timer_data)
7c673cae
FG
592{
593 union rte_timer_status prev_status, status;
594 unsigned lcore_id = rte_lcore_id();
595 int ret;
9f95a23c 596 struct priv_timer *priv_timer = timer_data->priv_timer;
7c673cae
FG
597
598 /* wait that the timer is in correct status before update,
599 * and mark it as being configured */
9f95a23c 600 ret = timer_set_config_state(tim, &prev_status, priv_timer);
7c673cae
FG
601 if (ret < 0)
602 return -1;
603
9f95a23c 604 __TIMER_STAT_ADD(priv_timer, stop, 1);
7c673cae
FG
605 if (prev_status.state == RTE_TIMER_RUNNING &&
606 lcore_id < RTE_MAX_LCORE) {
607 priv_timer[lcore_id].updated = 1;
608 }
609
610 /* remove it from list */
611 if (prev_status.state == RTE_TIMER_PENDING) {
9f95a23c
TL
612 timer_del(tim, prev_status, local_is_locked, priv_timer);
613 __TIMER_STAT_ADD(priv_timer, pending, -1);
7c673cae
FG
614 }
615
616 /* mark timer as stopped */
7c673cae
FG
617 status.state = RTE_TIMER_STOP;
618 status.owner = RTE_TIMER_NO_OWNER;
f67539c2
TL
619 /* The "RELEASE" ordering guarantees the memory operations above
620 * the status update are observed before the update by all threads
621 */
622 __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
7c673cae
FG
623
624 return 0;
625}
626
9f95a23c
TL
627/* Stop the timer associated with the timer handle tim */
628int
f67539c2 629rte_timer_stop(struct rte_timer *tim)
9f95a23c
TL
630{
631 return rte_timer_alt_stop(default_data_id, tim);
632}
9f95a23c 633
f67539c2 634int
9f95a23c
TL
635rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
636{
637 struct rte_timer_data *timer_data;
638
639 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
640
641 return __rte_timer_stop(tim, 0, timer_data);
642}
643
7c673cae
FG
644/* loop until rte_timer_stop() succeed */
645void
646rte_timer_stop_sync(struct rte_timer *tim)
647{
648 while (rte_timer_stop(tim) != 0)
649 rte_pause();
650}
651
652/* Test the PENDING status of the timer handle tim */
653int
654rte_timer_pending(struct rte_timer *tim)
655{
f67539c2
TL
656 return __atomic_load_n(&tim->status.state,
657 __ATOMIC_RELAXED) == RTE_TIMER_PENDING;
7c673cae
FG
658}
659
660/* must be called periodically, run all timer that expired */
9f95a23c
TL
661static void
662__rte_timer_manage(struct rte_timer_data *timer_data)
7c673cae
FG
663{
664 union rte_timer_status status;
665 struct rte_timer *tim, *next_tim;
666 struct rte_timer *run_first_tim, **pprev;
667 unsigned lcore_id = rte_lcore_id();
668 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
669 uint64_t cur_time;
670 int i, ret;
9f95a23c 671 struct priv_timer *priv_timer = timer_data->priv_timer;
7c673cae
FG
672
673 /* timer manager only runs on EAL thread with valid lcore_id */
674 assert(lcore_id < RTE_MAX_LCORE);
675
9f95a23c 676 __TIMER_STAT_ADD(priv_timer, manage, 1);
7c673cae
FG
677 /* optimize for the case where per-cpu list is empty */
678 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
679 return;
680 cur_time = rte_get_timer_cycles();
681
11fdf7f2 682#ifdef RTE_ARCH_64
7c673cae
FG
683 /* on 64-bit the value cached in the pending_head.expired will be
684 * updated atomically, so we can consult that for a quick check here
685 * outside the lock */
686 if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
687 return;
688#endif
689
690 /* browse ordered list, add expired timers in 'expired' list */
691 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
692
693 /* if nothing to do just unlock and return */
694 if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
695 priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
696 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
697 return;
698 }
699
700 /* save start of list of expired timers */
701 tim = priv_timer[lcore_id].pending_head.sl_next[0];
702
703 /* break the existing list at current time point */
9f95a23c 704 timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
7c673cae
FG
705 for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
706 if (prev[i] == &priv_timer[lcore_id].pending_head)
707 continue;
708 priv_timer[lcore_id].pending_head.sl_next[i] =
709 prev[i]->sl_next[i];
710 if (prev[i]->sl_next[i] == NULL)
711 priv_timer[lcore_id].curr_skiplist_depth--;
712 prev[i] ->sl_next[i] = NULL;
713 }
714
715 /* transition run-list from PENDING to RUNNING */
716 run_first_tim = tim;
717 pprev = &run_first_tim;
718
719 for ( ; tim != NULL; tim = next_tim) {
720 next_tim = tim->sl_next[0];
721
722 ret = timer_set_running_state(tim);
723 if (likely(ret == 0)) {
724 pprev = &tim->sl_next[0];
725 } else {
726 /* another core is trying to re-config this one,
727 * remove it from local expired list
728 */
729 *pprev = next_tim;
730 }
731 }
732
733 /* update the next to expire timer value */
734 priv_timer[lcore_id].pending_head.expire =
735 (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
736 priv_timer[lcore_id].pending_head.sl_next[0]->expire;
737
738 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
739
740 /* now scan expired list and call callbacks */
741 for (tim = run_first_tim; tim != NULL; tim = next_tim) {
742 next_tim = tim->sl_next[0];
743 priv_timer[lcore_id].updated = 0;
744 priv_timer[lcore_id].running_tim = tim;
745
746 /* execute callback function with list unlocked */
747 tim->f(tim, tim->arg);
748
9f95a23c 749 __TIMER_STAT_ADD(priv_timer, pending, -1);
7c673cae
FG
750 /* the timer was stopped or reloaded by the callback
751 * function, we have nothing to do here */
752 if (priv_timer[lcore_id].updated == 1)
753 continue;
754
755 if (tim->period == 0) {
756 /* remove from done list and mark timer as stopped */
757 status.state = RTE_TIMER_STOP;
758 status.owner = RTE_TIMER_NO_OWNER;
f67539c2
TL
759 /* The "RELEASE" ordering guarantees the memory
760 * operations above the status update are observed
761 * before the update by all threads
762 */
763 __atomic_store_n(&tim->status.u32, status.u32,
764 __ATOMIC_RELEASE);
7c673cae
FG
765 }
766 else {
767 /* keep it in list and mark timer as pending */
768 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
769 status.state = RTE_TIMER_PENDING;
9f95a23c 770 __TIMER_STAT_ADD(priv_timer, pending, 1);
7c673cae 771 status.owner = (int16_t)lcore_id;
f67539c2
TL
772 /* The "RELEASE" ordering guarantees the memory
773 * operations above the status update are observed
774 * before the update by all threads
775 */
776 __atomic_store_n(&tim->status.u32, status.u32,
777 __ATOMIC_RELEASE);
7c673cae 778 __rte_timer_reset(tim, tim->expire + tim->period,
9f95a23c
TL
779 tim->period, lcore_id, tim->f, tim->arg, 1,
780 timer_data);
7c673cae
FG
781 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
782 }
783 }
784 priv_timer[lcore_id].running_tim = NULL;
785}
786
9f95a23c 787int
f67539c2 788rte_timer_manage(void)
9f95a23c
TL
789{
790 struct rte_timer_data *timer_data;
791
792 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
793
794 __rte_timer_manage(timer_data);
795
796 return 0;
797}
9f95a23c 798
f67539c2 799int
9f95a23c
TL
800rte_timer_alt_manage(uint32_t timer_data_id,
801 unsigned int *poll_lcores,
802 int nb_poll_lcores,
803 rte_timer_alt_manage_cb_t f)
804{
805 unsigned int default_poll_lcores[] = {rte_lcore_id()};
806 union rte_timer_status status;
807 struct rte_timer *tim, *next_tim, **pprev;
808 struct rte_timer *run_first_tims[RTE_MAX_LCORE];
809 unsigned int this_lcore = rte_lcore_id();
810 struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
811 uint64_t cur_time;
812 int i, j, ret;
813 int nb_runlists = 0;
814 struct rte_timer_data *data;
815 struct priv_timer *privp;
816 uint32_t poll_lcore;
817
818 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
819
820 /* timer manager only runs on EAL thread with valid lcore_id */
821 assert(this_lcore < RTE_MAX_LCORE);
822
823 __TIMER_STAT_ADD(data->priv_timer, manage, 1);
824
825 if (poll_lcores == NULL) {
826 poll_lcores = default_poll_lcores;
827 nb_poll_lcores = RTE_DIM(default_poll_lcores);
828 }
829
830 for (i = 0; i < nb_poll_lcores; i++) {
831 poll_lcore = poll_lcores[i];
832 privp = &data->priv_timer[poll_lcore];
833
834 /* optimize for the case where per-cpu list is empty */
835 if (privp->pending_head.sl_next[0] == NULL)
836 continue;
837 cur_time = rte_get_timer_cycles();
838
839#ifdef RTE_ARCH_64
840 /* on 64-bit the value cached in the pending_head.expired will
841 * be updated atomically, so we can consult that for a quick
842 * check here outside the lock
843 */
844 if (likely(privp->pending_head.expire > cur_time))
845 continue;
846#endif
847
848 /* browse ordered list, add expired timers in 'expired' list */
849 rte_spinlock_lock(&privp->list_lock);
850
851 /* if nothing to do just unlock and return */
852 if (privp->pending_head.sl_next[0] == NULL ||
853 privp->pending_head.sl_next[0]->expire > cur_time) {
854 rte_spinlock_unlock(&privp->list_lock);
855 continue;
856 }
857
858 /* save start of list of expired timers */
859 tim = privp->pending_head.sl_next[0];
860
861 /* break the existing list at current time point */
862 timer_get_prev_entries(cur_time, poll_lcore, prev,
863 data->priv_timer);
864 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
865 if (prev[j] == &privp->pending_head)
866 continue;
867 privp->pending_head.sl_next[j] =
868 prev[j]->sl_next[j];
869 if (prev[j]->sl_next[j] == NULL)
870 privp->curr_skiplist_depth--;
871
872 prev[j]->sl_next[j] = NULL;
873 }
874
875 /* transition run-list from PENDING to RUNNING */
876 run_first_tims[nb_runlists] = tim;
877 pprev = &run_first_tims[nb_runlists];
878 nb_runlists++;
879
880 for ( ; tim != NULL; tim = next_tim) {
881 next_tim = tim->sl_next[0];
882
883 ret = timer_set_running_state(tim);
884 if (likely(ret == 0)) {
885 pprev = &tim->sl_next[0];
886 } else {
887 /* another core is trying to re-config this one,
888 * remove it from local expired list
889 */
890 *pprev = next_tim;
891 }
892 }
893
894 /* update the next to expire timer value */
895 privp->pending_head.expire =
896 (privp->pending_head.sl_next[0] == NULL) ? 0 :
897 privp->pending_head.sl_next[0]->expire;
898
899 rte_spinlock_unlock(&privp->list_lock);
900 }
901
902 /* Now process the run lists */
903 while (1) {
904 bool done = true;
905 uint64_t min_expire = UINT64_MAX;
906 int min_idx = 0;
907
908 /* Find the next oldest timer to process */
909 for (i = 0; i < nb_runlists; i++) {
910 tim = run_first_tims[i];
911
912 if (tim != NULL && tim->expire < min_expire) {
913 min_expire = tim->expire;
914 min_idx = i;
915 done = false;
916 }
917 }
918
919 if (done)
920 break;
921
922 tim = run_first_tims[min_idx];
923
924 /* Move down the runlist from which we picked a timer to
925 * execute
926 */
927 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
928
929 data->priv_timer[this_lcore].updated = 0;
930 data->priv_timer[this_lcore].running_tim = tim;
931
932 /* Call the provided callback function */
933 f(tim);
934
935 __TIMER_STAT_ADD(data->priv_timer, pending, -1);
936
937 /* the timer was stopped or reloaded by the callback
938 * function, we have nothing to do here
939 */
940 if (data->priv_timer[this_lcore].updated == 1)
941 continue;
942
943 if (tim->period == 0) {
944 /* remove from done list and mark timer as stopped */
945 status.state = RTE_TIMER_STOP;
946 status.owner = RTE_TIMER_NO_OWNER;
f67539c2
TL
947 /* The "RELEASE" ordering guarantees the memory
948 * operations above the status update are observed
949 * before the update by all threads
950 */
951 __atomic_store_n(&tim->status.u32, status.u32,
952 __ATOMIC_RELEASE);
9f95a23c
TL
953 } else {
954 /* keep it in list and mark timer as pending */
955 rte_spinlock_lock(
956 &data->priv_timer[this_lcore].list_lock);
957 status.state = RTE_TIMER_PENDING;
958 __TIMER_STAT_ADD(data->priv_timer, pending, 1);
959 status.owner = (int16_t)this_lcore;
f67539c2
TL
960 /* The "RELEASE" ordering guarantees the memory
961 * operations above the status update are observed
962 * before the update by all threads
963 */
964 __atomic_store_n(&tim->status.u32, status.u32,
965 __ATOMIC_RELEASE);
9f95a23c
TL
966 __rte_timer_reset(tim, tim->expire + tim->period,
967 tim->period, this_lcore, tim->f, tim->arg, 1,
968 data);
969 rte_spinlock_unlock(
970 &data->priv_timer[this_lcore].list_lock);
971 }
972
973 data->priv_timer[this_lcore].running_tim = NULL;
974 }
975
976 return 0;
977}
978
979/* Walk pending lists, stopping timers and calling user-specified function */
f67539c2 980int
9f95a23c
TL
981rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
982 int nb_walk_lcores,
983 rte_timer_stop_all_cb_t f, void *f_arg)
984{
985 int i;
986 struct priv_timer *priv_timer;
987 uint32_t walk_lcore;
988 struct rte_timer *tim, *next_tim;
989 struct rte_timer_data *timer_data;
990
991 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
992
993 for (i = 0; i < nb_walk_lcores; i++) {
994 walk_lcore = walk_lcores[i];
995 priv_timer = &timer_data->priv_timer[walk_lcore];
996
997 rte_spinlock_lock(&priv_timer->list_lock);
998
999 for (tim = priv_timer->pending_head.sl_next[0];
1000 tim != NULL;
1001 tim = next_tim) {
1002 next_tim = tim->sl_next[0];
1003
1004 /* Call timer_stop with lock held */
1005 __rte_timer_stop(tim, 1, timer_data);
1006
1007 if (f)
1008 f(tim, f_arg);
1009 }
1010
1011 rte_spinlock_unlock(&priv_timer->list_lock);
1012 }
1013
1014 return 0;
1015}
1016
f67539c2
TL
1017int64_t
1018rte_timer_next_ticks(void)
1019{
1020 unsigned int lcore_id = rte_lcore_id();
1021 struct rte_timer_data *timer_data;
1022 struct priv_timer *priv_timer;
1023 const struct rte_timer *tm;
1024 uint64_t cur_time;
1025 int64_t left = -ENOENT;
1026
1027 TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
1028
1029 priv_timer = timer_data->priv_timer;
1030 cur_time = rte_get_timer_cycles();
1031
1032 rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
1033 tm = priv_timer[lcore_id].pending_head.sl_next[0];
1034 if (tm) {
1035 left = tm->expire - cur_time;
1036 if (left < 0)
1037 left = 0;
1038 }
1039 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
1040
1041 return left;
1042}
1043
7c673cae 1044/* dump statistics about timers */
9f95a23c
TL
1045static void
1046__rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
7c673cae
FG
1047{
1048#ifdef RTE_LIBRTE_TIMER_DEBUG
1049 struct rte_timer_debug_stats sum;
1050 unsigned lcore_id;
9f95a23c 1051 struct priv_timer *priv_timer = timer_data->priv_timer;
7c673cae
FG
1052
1053 memset(&sum, 0, sizeof(sum));
1054 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1055 sum.reset += priv_timer[lcore_id].stats.reset;
1056 sum.stop += priv_timer[lcore_id].stats.stop;
1057 sum.manage += priv_timer[lcore_id].stats.manage;
1058 sum.pending += priv_timer[lcore_id].stats.pending;
1059 }
1060 fprintf(f, "Timer statistics:\n");
1061 fprintf(f, " reset = %"PRIu64"\n", sum.reset);
1062 fprintf(f, " stop = %"PRIu64"\n", sum.stop);
1063 fprintf(f, " manage = %"PRIu64"\n", sum.manage);
1064 fprintf(f, " pending = %"PRIu64"\n", sum.pending);
1065#else
1066 fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1067#endif
1068}
9f95a23c 1069
9f95a23c 1070int
f67539c2 1071rte_timer_dump_stats(FILE *f)
9f95a23c
TL
1072{
1073 return rte_timer_alt_dump_stats(default_data_id, f);
1074}
9f95a23c 1075
f67539c2 1076int
9f95a23c
TL
1077rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1078{
1079 struct rte_timer_data *timer_data;
1080
1081 TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1082
1083 __rte_timer_dump_stats(timer_data, f);
1084
1085 return 0;
1086}