[ceph.git] / ceph / src / spdk / dpdk / drivers / event / octeontx2 / otx2_tim_worker.h

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright(C) 2019 Marvell International Ltd.
 */

#ifndef __OTX2_TIM_WORKER_H__
#define __OTX2_TIM_WORKER_H__

#include "otx2_tim_evdev.h"

static inline uint8_t
tim_bkt_fetch_lock(uint64_t w1)
{
	return (w1 >> TIM_BUCKET_W1_S_LOCK) &
		TIM_BUCKET_W1_M_LOCK;
}

static inline int16_t
tim_bkt_fetch_rem(uint64_t w1)
{
	return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
		TIM_BUCKET_W1_M_CHUNK_REMAINDER;
}

static inline int16_t
tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
{
	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
}

static inline void
tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
{
	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
}

static inline void
tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
{
	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
}

static inline uint8_t
tim_bkt_get_hbt(uint64_t w1)
{
	return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
}

static inline uint8_t
tim_bkt_get_bsk(uint64_t w1)
{
	return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
}

static inline uint64_t
tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
{
	/* Clear everything except lock. */
	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;

	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
}

static inline uint64_t
tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
{
	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
			__ATOMIC_ACQUIRE);
}

static inline uint64_t
tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
{
	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
}

static inline uint64_t
tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
{
	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;

	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
}

static inline void
tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
{
	__atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
}

static inline uint32_t
tim_bkt_get_nent(uint64_t w1)
{
	return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
		TIM_BUCKET_W1_M_NUM_ENTRIES;
}

static inline void
tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
{
	__atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
}

static inline void
tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
{
	__atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
}

static inline uint64_t
tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
{
	const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
			TIM_BUCKET_W1_S_NUM_ENTRIES);

	return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
}

static __rte_always_inline void
tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
		      const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
		      struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
{
	const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
	uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
			&tim_ring->fast_div) + rel_bkt;
	uint32_t mirr_bucket = 0;

	if (flag & OTX2_TIM_BKT_MOD) {
		bucket = bucket % tim_ring->nb_bkts;
		mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
						tim_ring->nb_bkts;
	}
	if (flag & OTX2_TIM_BKT_AND) {
		bucket = bucket & (tim_ring->nb_bkts - 1);
		mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
						(tim_ring->nb_bkts - 1);
	}

	*bkt = &tim_ring->bkt[bucket];
	*mirr_bkt = &tim_ring->bkt[mirr_bucket];
}

static struct otx2_tim_ent *
tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
	    struct otx2_tim_bkt * const bkt)
{
#define TIM_MAX_OUTSTANDING_OBJ		64
	void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
	struct otx2_tim_ent *chunk;
	struct otx2_tim_ent *pnext;
	uint8_t objs = 0;


	chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
	chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
			tim_ring->nb_chunk_slots)->w0;
	while (chunk) {
		pnext = (struct otx2_tim_ent *)(uintptr_t)
			((chunk + tim_ring->nb_chunk_slots)->w0);
		if (objs == TIM_MAX_OUTSTANDING_OBJ) {
			rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
					     objs);
			objs = 0;
		}
		pend_chunks[objs++] = chunk;
		chunk = pnext;
	}

	if (objs)
		rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
				objs);

	return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
}

static struct otx2_tim_ent *
tim_refill_chunk(struct otx2_tim_bkt * const bkt,
		 struct otx2_tim_bkt * const mirr_bkt,
		 struct otx2_tim_ring * const tim_ring)
{
	struct otx2_tim_ent *chunk;

	if (bkt->nb_entry || !bkt->first_chunk) {
		if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
					     (void **)&chunk)))
			return NULL;
		if (bkt->nb_entry) {
			*(uint64_t *)(((struct otx2_tim_ent *)
						mirr_bkt->current_chunk) +
					tim_ring->nb_chunk_slots) =
				(uintptr_t)chunk;
		} else {
			bkt->first_chunk = (uintptr_t)chunk;
		}
	} else {
		chunk = tim_clr_bkt(tim_ring, bkt);
		bkt->first_chunk = (uintptr_t)chunk;
	}
	*(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;

	return chunk;
}

static struct otx2_tim_ent *
tim_insert_chunk(struct otx2_tim_bkt * const bkt,
		 struct otx2_tim_bkt * const mirr_bkt,
		 struct otx2_tim_ring * const tim_ring)
{
	struct otx2_tim_ent *chunk;

	if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
		return NULL;

	*(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
	if (bkt->nb_entry) {
		*(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
					mirr_bkt->current_chunk) +
				tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
	} else {
		bkt->first_chunk = (uintptr_t)chunk;
	}
	return chunk;
}

static __rte_always_inline int
tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
		 const uint32_t rel_bkt,
		 struct rte_event_timer * const tim,
		 const struct otx2_tim_ent * const pent,
		 const uint8_t flags)
{
	struct otx2_tim_bkt *mirr_bkt;
	struct otx2_tim_ent *chunk;
	struct otx2_tim_bkt *bkt;
	uint64_t lock_sema;
	int16_t rem;

__retry:
	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);

	/* Get Bucket sema*/
	lock_sema = tim_bkt_fetch_sema_lock(bkt);

	/* Bucket related checks. */
	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
		if (tim_bkt_get_nent(lock_sema) != 0) {
			uint64_t hbt_state;
#ifdef RTE_ARCH_ARM64
			asm volatile(
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbz %[hbt], 33, dne%=	\n"
					"	sevl			\n"
					"rty%=: wfe			\n"
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbnz %[hbt], 33, rty%=	\n"
					"dne%=:				\n"
					: [hbt] "=&r" (hbt_state)
					: [w1] "r" ((&bkt->w1))
					: "memory"
				    );
#else
			do {
				hbt_state = __atomic_load_n(&bkt->w1,
						__ATOMIC_ACQUIRE);
			} while (hbt_state & BIT_ULL(33));
#endif

			if (!(hbt_state & BIT_ULL(34))) {
				tim_bkt_dec_lock(bkt);
				goto __retry;
			}
		}
	}
	/* Insert the work. */
	rem = tim_bkt_fetch_rem(lock_sema);

	if (!rem) {
		if (flags & OTX2_TIM_ENA_FB)
			chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
		if (flags & OTX2_TIM_ENA_DFB)
			chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);

		if (unlikely(chunk == NULL)) {
			bkt->chunk_remainder = 0;
			tim_bkt_dec_lock(bkt);
			tim->impl_opaque[0] = 0;
			tim->impl_opaque[1] = 0;
			tim->state = RTE_EVENT_TIMER_ERROR;
			return -ENOMEM;
		}
		mirr_bkt->current_chunk = (uintptr_t)chunk;
		bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
	} else {
		chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
		chunk += tim_ring->nb_chunk_slots - rem;
	}

	/* Copy work entry. */
	*chunk = *pent;

	tim_bkt_inc_nent(bkt);
	tim_bkt_dec_lock(bkt);

	tim->impl_opaque[0] = (uintptr_t)chunk;
	tim->impl_opaque[1] = (uintptr_t)bkt;
	tim->state = RTE_EVENT_TIMER_ARMED;

	return 0;
}

static __rte_always_inline int
tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
		 const uint32_t rel_bkt,
		 struct rte_event_timer * const tim,
		 const struct otx2_tim_ent * const pent,
		 const uint8_t flags)
{
	struct otx2_tim_bkt *mirr_bkt;
	struct otx2_tim_ent *chunk;
	struct otx2_tim_bkt *bkt;
	uint64_t lock_sema;
	int16_t rem;

__retry:
	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
	/* Get Bucket sema*/
	lock_sema = tim_bkt_fetch_sema_lock(bkt);

	/* Bucket related checks. */
	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
		if (tim_bkt_get_nent(lock_sema) != 0) {
			uint64_t hbt_state;
#ifdef RTE_ARCH_ARM64
			asm volatile(
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbz %[hbt], 33, dne%=	\n"
					"	sevl			\n"
					"rty%=: wfe			\n"
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbnz %[hbt], 33, rty%=	\n"
					"dne%=:				\n"
					: [hbt] "=&r" (hbt_state)
					: [w1] "r" ((&bkt->w1))
					: "memory"
				    );
#else
			do {
				hbt_state = __atomic_load_n(&bkt->w1,
						__ATOMIC_ACQUIRE);
			} while (hbt_state & BIT_ULL(33));
#endif

			if (!(hbt_state & BIT_ULL(34))) {
				tim_bkt_dec_lock(bkt);
				goto __retry;
			}
		}
	}

	rem = tim_bkt_fetch_rem(lock_sema);
	if (rem < 0) {
#ifdef RTE_ARCH_ARM64
		asm volatile(
				"	ldaxrh %w[rem], [%[crem]]	\n"
				"	tbz %w[rem], 15, dne%=		\n"
				"	sevl				\n"
				"rty%=: wfe				\n"
				"	ldaxrh %w[rem], [%[crem]]	\n"
				"	tbnz %w[rem], 15, rty%=		\n"
				"dne%=:					\n"
				: [rem] "=&r" (rem)
				: [crem] "r" (&bkt->chunk_remainder)
				: "memory"
			    );
#else
		while (__atomic_load_n(&bkt->chunk_remainder,
				       __ATOMIC_ACQUIRE) < 0)
			;
#endif
		/* Goto diff bucket. */
		tim_bkt_dec_lock(bkt);
		goto __retry;
	} else if (!rem) {
		/* Only one thread can be here*/
		if (flags & OTX2_TIM_ENA_FB)
			chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
		if (flags & OTX2_TIM_ENA_DFB)
			chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);

		if (unlikely(chunk == NULL)) {
			tim_bkt_set_rem(bkt, 0);
			tim_bkt_dec_lock(bkt);
			tim->impl_opaque[0] = 0;
			tim->impl_opaque[1] = 0;
			tim->state = RTE_EVENT_TIMER_ERROR;
			return -ENOMEM;
		}
		*chunk = *pent;
		while (tim_bkt_fetch_lock(lock_sema) !=
				(-tim_bkt_fetch_rem(lock_sema)))
			lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);

		mirr_bkt->current_chunk = (uintptr_t)chunk;
		__atomic_store_n(&bkt->chunk_remainder,
				tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
	} else {
		chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
		chunk += tim_ring->nb_chunk_slots - rem;
		*chunk = *pent;
	}

	/* Copy work entry. */
	tim_bkt_inc_nent(bkt);
	tim_bkt_dec_lock(bkt);
	tim->impl_opaque[0] = (uintptr_t)chunk;
	tim->impl_opaque[1] = (uintptr_t)bkt;
	tim->state = RTE_EVENT_TIMER_ARMED;

	return 0;
}

static inline uint16_t
tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
	    struct otx2_tim_ent *chunk,
	    struct rte_event_timer ** const tim,
	    const struct otx2_tim_ent * const ents,
	    const struct otx2_tim_bkt * const bkt)
{
	for (; index < cpy_lmt; index++) {
		*chunk = *(ents + index);
		tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
		tim[index]->impl_opaque[1] = (uintptr_t)bkt;
		tim[index]->state = RTE_EVENT_TIMER_ARMED;
	}

	return index;
}

/* Burst mode functions */
static inline int
tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
		   const uint16_t rel_bkt,
		   struct rte_event_timer ** const tim,
		   const struct otx2_tim_ent *ents,
		   const uint16_t nb_timers, const uint8_t flags)
{
	struct otx2_tim_ent *chunk = NULL;
	struct otx2_tim_bkt *mirr_bkt;
	struct otx2_tim_bkt *bkt;
	uint16_t chunk_remainder;
	uint16_t index = 0;
	uint64_t lock_sema;
	int16_t rem, crem;
	uint8_t lock_cnt;

__retry:
	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);

	/* Only one thread beyond this. */
	lock_sema = tim_bkt_inc_lock(bkt);
	lock_cnt = (uint8_t)
		((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);

	if (lock_cnt) {
		tim_bkt_dec_lock(bkt);
		goto __retry;
	}

	/* Bucket related checks. */
	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
		if (tim_bkt_get_nent(lock_sema) != 0) {
			uint64_t hbt_state;
#ifdef RTE_ARCH_ARM64
			asm volatile(
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbz %[hbt], 33, dne%=	\n"
					"	sevl			\n"
					"rty%=: wfe			\n"
					"	ldaxr %[hbt], [%[w1]]	\n"
					"	tbnz %[hbt], 33, rty%=	\n"
					"dne%=:				\n"
					: [hbt] "=&r" (hbt_state)
					: [w1] "r" ((&bkt->w1))
					: "memory"
					);
#else
			do {
				hbt_state = __atomic_load_n(&bkt->w1,
						__ATOMIC_ACQUIRE);
			} while (hbt_state & BIT_ULL(33));
#endif

			if (!(hbt_state & BIT_ULL(34))) {
				tim_bkt_dec_lock(bkt);
				goto __retry;
			}
		}
	}

	chunk_remainder = tim_bkt_fetch_rem(lock_sema);
	rem = chunk_remainder - nb_timers;
	if (rem < 0) {
		crem = tim_ring->nb_chunk_slots - chunk_remainder;
		if (chunk_remainder && crem) {
			chunk = ((struct otx2_tim_ent *)
					mirr_bkt->current_chunk) + crem;

			index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
					    ents, bkt);
			tim_bkt_sub_rem(bkt, chunk_remainder);
			tim_bkt_add_nent(bkt, chunk_remainder);
		}

		if (flags & OTX2_TIM_ENA_FB)
			chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
		if (flags & OTX2_TIM_ENA_DFB)
			chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);

		if (unlikely(chunk == NULL)) {
			tim_bkt_dec_lock(bkt);
			rte_errno = ENOMEM;
			tim[index]->state = RTE_EVENT_TIMER_ERROR;
			return crem;
		}
		*(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
		mirr_bkt->current_chunk = (uintptr_t)chunk;
		tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);

		rem = nb_timers - chunk_remainder;
		tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
		tim_bkt_add_nent(bkt, rem);
	} else {
		chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
		chunk += (tim_ring->nb_chunk_slots - chunk_remainder);

		tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
		tim_bkt_sub_rem(bkt, nb_timers);
		tim_bkt_add_nent(bkt, nb_timers);
	}

	tim_bkt_dec_lock(bkt);

	return nb_timers;
}

static int
tim_rm_entry(struct rte_event_timer *tim)
{
	struct otx2_tim_ent *entry;
	struct otx2_tim_bkt *bkt;
	uint64_t lock_sema;

	if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
		return -ENOENT;

	entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
	if (entry->wqe != tim->ev.u64) {
		tim->impl_opaque[0] = 0;
		tim->impl_opaque[1] = 0;
		return -ENOENT;
	}

	bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
	lock_sema = tim_bkt_inc_lock(bkt);
	if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
		tim_bkt_dec_lock(bkt);
		tim->impl_opaque[0] = 0;
		tim->impl_opaque[1] = 0;
		return -ENOENT;
	}

	entry->w0 = 0;
	entry->wqe = 0;
	tim_bkt_dec_lock(bkt);

	tim->state = RTE_EVENT_TIMER_CANCELED;
	tim->impl_opaque[0] = 0;
	tim->impl_opaque[1] = 0;

	return 0;
}

#endif /* __OTX2_TIM_WORKER_H__ */
Commit	Line	Data
f67539c2 TL	1	/* SPDX-License-Identifier: BSD-3-Clause
	2	* Copyright(C) 2019 Marvell International Ltd.
	3	*/
	4
	5	#ifndef __OTX2_TIM_WORKER_H__
	6	#define __OTX2_TIM_WORKER_H__
	7
	8	#include "otx2_tim_evdev.h"
	9
	10	static inline uint8_t
	11	tim_bkt_fetch_lock(uint64_t w1)
	12	{
	13	return (w1 >> TIM_BUCKET_W1_S_LOCK) &
	14	TIM_BUCKET_W1_M_LOCK;
	15	}
	16
	17	static inline int16_t
	18	tim_bkt_fetch_rem(uint64_t w1)
	19	{
	20	return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
	21	TIM_BUCKET_W1_M_CHUNK_REMAINDER;
	22	}
	23
	24	static inline int16_t
	25	tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
	26	{
	27	return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
	28	}
	29
	30	static inline void
	31	tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
	32	{
	33	__atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
	34	}
	35
	36	static inline void
	37	tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
	38	{
	39	__atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
	40	}
	41
	42	static inline uint8_t
	43	tim_bkt_get_hbt(uint64_t w1)
	44	{
	45	return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
	46	}
	47
	48	static inline uint8_t
	49	tim_bkt_get_bsk(uint64_t w1)
	50	{
	51	return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
	52	}
	53
	54	static inline uint64_t
	55	tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
	56	{
	57	/* Clear everything except lock. */
	58	const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
	59
	60	return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
	61	}
	62
	63	static inline uint64_t
	64	tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
65	{
66	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
67	__ATOMIC_ACQUIRE);
68	}
69
70	static inline uint64_t
71	tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
72	{
73	return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
74	}
75
76	static inline uint64_t
77	tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
78	{
79	const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80
81	return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
82	}
83
84	static inline void
85	tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
86	{
87	__atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
88	}
89
90	static inline uint32_t
91	tim_bkt_get_nent(uint64_t w1)
92	{
93	return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
94	TIM_BUCKET_W1_M_NUM_ENTRIES;
95	}
96
97	static inline void
98	tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
99	{
100	__atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
101	}
102
103	static inline void
104	tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
105	{
106	__atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
107	}
108
109	static inline uint64_t
110	tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
111	{
112	const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
113	TIM_BUCKET_W1_S_NUM_ENTRIES);
114
115	return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
116	}
117
118	static __rte_always_inline void
119	tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
120	const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
121	struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
122	{
123	const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
124	uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
125	&tim_ring->fast_div) + rel_bkt;
126	uint32_t mirr_bucket = 0;
127
128	if (flag & OTX2_TIM_BKT_MOD) {
129	bucket = bucket % tim_ring->nb_bkts;
130	mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
131	tim_ring->nb_bkts;
132	}
133	if (flag & OTX2_TIM_BKT_AND) {
134	bucket = bucket & (tim_ring->nb_bkts - 1);
135	mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
136	(tim_ring->nb_bkts - 1);
137	}
138
139	*bkt = &tim_ring->bkt[bucket];
140	*mirr_bkt = &tim_ring->bkt[mirr_bucket];
141	}
142
143	static struct otx2_tim_ent *
144	tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
145	struct otx2_tim_bkt * const bkt)
146	{
147	#define TIM_MAX_OUTSTANDING_OBJ 64
148	void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
149	struct otx2_tim_ent *chunk;
150	struct otx2_tim_ent *pnext;
151	uint8_t objs = 0;
152
153
154	chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
155	chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
156	tim_ring->nb_chunk_slots)->w0;
157	while (chunk) {
158	pnext = (struct otx2_tim_ent *)(uintptr_t)
159	((chunk + tim_ring->nb_chunk_slots)->w0);
160	if (objs == TIM_MAX_OUTSTANDING_OBJ) {
161	rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
162	objs);
163	objs = 0;
164	}
165	pend_chunks[objs++] = chunk;
166	chunk = pnext;
167	}
168
169	if (objs)
170	rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
171	objs);
172
173	return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
174	}
175
176	static struct otx2_tim_ent *
177	tim_refill_chunk(struct otx2_tim_bkt * const bkt,
178	struct otx2_tim_bkt * const mirr_bkt,
179	struct otx2_tim_ring * const tim_ring)
180	{
181	struct otx2_tim_ent *chunk;
182
183	if (bkt->nb_entry \|\| !bkt->first_chunk) {
184	if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
185	(void **)&chunk)))
186	return NULL;
187	if (bkt->nb_entry) {
188	(uint64_t )(((struct otx2_tim_ent *)
189	mirr_bkt->current_chunk) +
190	tim_ring->nb_chunk_slots) =
191	(uintptr_t)chunk;
192	} else {
193	bkt->first_chunk = (uintptr_t)chunk;
194	}
195	} else {
196	chunk = tim_clr_bkt(tim_ring, bkt);
197	bkt->first_chunk = (uintptr_t)chunk;
198	}
199	(uint64_t )(chunk + tim_ring->nb_chunk_slots) = 0;
200
201	return chunk;
202	}
203
204	static struct otx2_tim_ent *
205	tim_insert_chunk(struct otx2_tim_bkt * const bkt,
206	struct otx2_tim_bkt * const mirr_bkt,
207	struct otx2_tim_ring * const tim_ring)
208	{
209	struct otx2_tim_ent *chunk;
210
211	if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
212	return NULL;
213
214	(uint64_t )(chunk + tim_ring->nb_chunk_slots) = 0;
215	if (bkt->nb_entry) {
216	(uint64_t )(((struct otx2_tim_ent *)(uintptr_t)
217	mirr_bkt->current_chunk) +
218	tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
219	} else {
220	bkt->first_chunk = (uintptr_t)chunk;
221	}
222	return chunk;
223	}
224
225	static __rte_always_inline int
226	tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
227	const uint32_t rel_bkt,
228	struct rte_event_timer * const tim,
229	const struct otx2_tim_ent * const pent,
230	const uint8_t flags)
231	{
232	struct otx2_tim_bkt *mirr_bkt;
233	struct otx2_tim_ent *chunk;
234	struct otx2_tim_bkt *bkt;
235	uint64_t lock_sema;
236	int16_t rem;
237
238	__retry:
239	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
240
241	/* Get Bucket sema*/
242	lock_sema = tim_bkt_fetch_sema_lock(bkt);
243
244	/* Bucket related checks. */
245	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
246	if (tim_bkt_get_nent(lock_sema) != 0) {
247	uint64_t hbt_state;
248	#ifdef RTE_ARCH_ARM64
249	asm volatile(
250	" ldaxr %[hbt], [%[w1]] \n"
251	" tbz %[hbt], 33, dne%= \n"
252	" sevl \n"
253	"rty%=: wfe \n"
254	" ldaxr %[hbt], [%[w1]] \n"
255	" tbnz %[hbt], 33, rty%= \n"
256	"dne%=: \n"
257	: [hbt] "=&r" (hbt_state)
258	: [w1] "r" ((&bkt->w1))
259	: "memory"
260	);
261	#else
262	do {
263	hbt_state = __atomic_load_n(&bkt->w1,
264	__ATOMIC_ACQUIRE);
265	} while (hbt_state & BIT_ULL(33));
266	#endif
267
268	if (!(hbt_state & BIT_ULL(34))) {
269	tim_bkt_dec_lock(bkt);
270	goto __retry;
271	}
272	}
273	}
274	/* Insert the work. */
275	rem = tim_bkt_fetch_rem(lock_sema);
276
277	if (!rem) {
278	if (flags & OTX2_TIM_ENA_FB)
279	chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
280	if (flags & OTX2_TIM_ENA_DFB)
281	chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
282
283	if (unlikely(chunk == NULL)) {
284	bkt->chunk_remainder = 0;
285	tim_bkt_dec_lock(bkt);
286	tim->impl_opaque[0] = 0;
287	tim->impl_opaque[1] = 0;
288	tim->state = RTE_EVENT_TIMER_ERROR;
289	return -ENOMEM;
290	}
291	mirr_bkt->current_chunk = (uintptr_t)chunk;
292	bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
293	} else {
294	chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
295	chunk += tim_ring->nb_chunk_slots - rem;
296	}
297
298	/* Copy work entry. */
299	chunk = pent;
300
301	tim_bkt_inc_nent(bkt);
302	tim_bkt_dec_lock(bkt);
303
304	tim->impl_opaque[0] = (uintptr_t)chunk;
305	tim->impl_opaque[1] = (uintptr_t)bkt;
306	tim->state = RTE_EVENT_TIMER_ARMED;
307
308	return 0;
309	}
310
311	static __rte_always_inline int
312	tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
313	const uint32_t rel_bkt,
314	struct rte_event_timer * const tim,
315	const struct otx2_tim_ent * const pent,
316	const uint8_t flags)
317	{
318	struct otx2_tim_bkt *mirr_bkt;
319	struct otx2_tim_ent *chunk;
320	struct otx2_tim_bkt *bkt;
321	uint64_t lock_sema;
322	int16_t rem;
323
324	__retry:
325	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
326	/* Get Bucket sema*/
327	lock_sema = tim_bkt_fetch_sema_lock(bkt);
328
329	/* Bucket related checks. */
330	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
331	if (tim_bkt_get_nent(lock_sema) != 0) {
332	uint64_t hbt_state;
333	#ifdef RTE_ARCH_ARM64
334	asm volatile(
335	" ldaxr %[hbt], [%[w1]] \n"
336	" tbz %[hbt], 33, dne%= \n"
337	" sevl \n"
338	"rty%=: wfe \n"
339	" ldaxr %[hbt], [%[w1]] \n"
340	" tbnz %[hbt], 33, rty%= \n"
341	"dne%=: \n"
342	: [hbt] "=&r" (hbt_state)
343	: [w1] "r" ((&bkt->w1))
344	: "memory"
345	);
346	#else
347	do {
348	hbt_state = __atomic_load_n(&bkt->w1,
349	__ATOMIC_ACQUIRE);
350	} while (hbt_state & BIT_ULL(33));
351	#endif
352
353	if (!(hbt_state & BIT_ULL(34))) {
354	tim_bkt_dec_lock(bkt);
355	goto __retry;
356	}
357	}
358	}
359
360	rem = tim_bkt_fetch_rem(lock_sema);
361	if (rem < 0) {
362	#ifdef RTE_ARCH_ARM64
363	asm volatile(
364	" ldaxrh %w[rem], [%[crem]] \n"
365	" tbz %w[rem], 15, dne%= \n"
366	" sevl \n"
367	"rty%=: wfe \n"
368	" ldaxrh %w[rem], [%[crem]] \n"
369	" tbnz %w[rem], 15, rty%= \n"
370	"dne%=: \n"
371	: [rem] "=&r" (rem)
372	: [crem] "r" (&bkt->chunk_remainder)
373	: "memory"
374	);
375	#else
376	while (__atomic_load_n(&bkt->chunk_remainder,
377	__ATOMIC_ACQUIRE) < 0)
378	;
379	#endif
380	/* Goto diff bucket. */
381	tim_bkt_dec_lock(bkt);
382	goto __retry;
383	} else if (!rem) {
384	/* Only one thread can be here*/
385	if (flags & OTX2_TIM_ENA_FB)
386	chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
387	if (flags & OTX2_TIM_ENA_DFB)
388	chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
389
390	if (unlikely(chunk == NULL)) {
391	tim_bkt_set_rem(bkt, 0);
392	tim_bkt_dec_lock(bkt);
393	tim->impl_opaque[0] = 0;
394	tim->impl_opaque[1] = 0;
395	tim->state = RTE_EVENT_TIMER_ERROR;
396	return -ENOMEM;
397	}
398	chunk = pent;
399	while (tim_bkt_fetch_lock(lock_sema) !=
400	(-tim_bkt_fetch_rem(lock_sema)))
401	lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);
402
403	mirr_bkt->current_chunk = (uintptr_t)chunk;
404	__atomic_store_n(&bkt->chunk_remainder,
405	tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
406	} else {
407	chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
408	chunk += tim_ring->nb_chunk_slots - rem;
409	chunk = pent;
410	}
411
412	/* Copy work entry. */
413	tim_bkt_inc_nent(bkt);
414	tim_bkt_dec_lock(bkt);
415	tim->impl_opaque[0] = (uintptr_t)chunk;
416	tim->impl_opaque[1] = (uintptr_t)bkt;
417	tim->state = RTE_EVENT_TIMER_ARMED;
418
419	return 0;
420	}
421
422	static inline uint16_t
423	tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
424	struct otx2_tim_ent *chunk,
425	struct rte_event_timer ** const tim,
426	const struct otx2_tim_ent * const ents,
427	const struct otx2_tim_bkt * const bkt)
428	{
429	for (; index < cpy_lmt; index++) {
430	chunk = (ents + index);
431	tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
432	tim[index]->impl_opaque[1] = (uintptr_t)bkt;
433	tim[index]->state = RTE_EVENT_TIMER_ARMED;
434	}
435
436	return index;
437	}
438
439	/* Burst mode functions */
440	static inline int
441	tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
442	const uint16_t rel_bkt,
443	struct rte_event_timer ** const tim,
444	const struct otx2_tim_ent *ents,
445	const uint16_t nb_timers, const uint8_t flags)
446	{
447	struct otx2_tim_ent *chunk = NULL;
448	struct otx2_tim_bkt *mirr_bkt;
449	struct otx2_tim_bkt *bkt;
450	uint16_t chunk_remainder;
451	uint16_t index = 0;
452	uint64_t lock_sema;
453	int16_t rem, crem;
454	uint8_t lock_cnt;
455
456	__retry:
457	tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
458
459	/* Only one thread beyond this. */
460	lock_sema = tim_bkt_inc_lock(bkt);
461	lock_cnt = (uint8_t)
462	((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
463
464	if (lock_cnt) {
465	tim_bkt_dec_lock(bkt);
466	goto __retry;
467	}
468
469	/* Bucket related checks. */
470	if (unlikely(tim_bkt_get_hbt(lock_sema))) {
471	if (tim_bkt_get_nent(lock_sema) != 0) {
472	uint64_t hbt_state;
473	#ifdef RTE_ARCH_ARM64
474	asm volatile(
475	" ldaxr %[hbt], [%[w1]] \n"
476	" tbz %[hbt], 33, dne%= \n"
477	" sevl \n"
478	"rty%=: wfe \n"
479	" ldaxr %[hbt], [%[w1]] \n"
480	" tbnz %[hbt], 33, rty%= \n"
481	"dne%=: \n"
482	: [hbt] "=&r" (hbt_state)
483	: [w1] "r" ((&bkt->w1))
484	: "memory"
485	);
486	#else
487	do {
488	hbt_state = __atomic_load_n(&bkt->w1,
489	__ATOMIC_ACQUIRE);
490	} while (hbt_state & BIT_ULL(33));
491	#endif
492
493	if (!(hbt_state & BIT_ULL(34))) {
494	tim_bkt_dec_lock(bkt);
495	goto __retry;
496	}
497	}
498	}
499
500	chunk_remainder = tim_bkt_fetch_rem(lock_sema);
501	rem = chunk_remainder - nb_timers;
502	if (rem < 0) {
503	crem = tim_ring->nb_chunk_slots - chunk_remainder;
504	if (chunk_remainder && crem) {
505	chunk = ((struct otx2_tim_ent *)
506	mirr_bkt->current_chunk) + crem;
507
508	index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
509	ents, bkt);
510	tim_bkt_sub_rem(bkt, chunk_remainder);
511	tim_bkt_add_nent(bkt, chunk_remainder);
512	}
513
514	if (flags & OTX2_TIM_ENA_FB)
515	chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
516	if (flags & OTX2_TIM_ENA_DFB)
517	chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
518
519	if (unlikely(chunk == NULL)) {
520	tim_bkt_dec_lock(bkt);
521	rte_errno = ENOMEM;
522	tim[index]->state = RTE_EVENT_TIMER_ERROR;
523	return crem;
524	}
525	(uint64_t )(chunk + tim_ring->nb_chunk_slots) = 0;
526	mirr_bkt->current_chunk = (uintptr_t)chunk;
527	tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
528
529	rem = nb_timers - chunk_remainder;
530	tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
531	tim_bkt_add_nent(bkt, rem);
532	} else {
533	chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
534	chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
535
536	tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
537	tim_bkt_sub_rem(bkt, nb_timers);
538	tim_bkt_add_nent(bkt, nb_timers);
539	}
540
541	tim_bkt_dec_lock(bkt);
542
543	return nb_timers;
544	}
545
546	static int
547	tim_rm_entry(struct rte_event_timer *tim)
548	{
549	struct otx2_tim_ent *entry;
550	struct otx2_tim_bkt *bkt;
551	uint64_t lock_sema;
552
553	if (tim->impl_opaque[1] == 0 \|\| tim->impl_opaque[0] == 0)
554	return -ENOENT;
555
556	entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
557	if (entry->wqe != tim->ev.u64) {
558	tim->impl_opaque[0] = 0;
559	tim->impl_opaque[1] = 0;
560	return -ENOENT;
561	}
562
563	bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
564	lock_sema = tim_bkt_inc_lock(bkt);
565	if (tim_bkt_get_hbt(lock_sema) \|\| !tim_bkt_get_nent(lock_sema)) {
566	tim_bkt_dec_lock(bkt);
567	tim->impl_opaque[0] = 0;
568	tim->impl_opaque[1] = 0;
569	return -ENOENT;
570	}
571
572	entry->w0 = 0;
573	entry->wqe = 0;
574	tim_bkt_dec_lock(bkt);
575
576	tim->state = RTE_EVENT_TIMER_CANCELED;
577	tim->impl_opaque[0] = 0;
578	tim->impl_opaque[1] = 0;
579
580	return 0;
581	}
582
583	#endif /* __OTX2_TIM_WORKER_H__ */