[mirror_ubuntu-bionic-kernel.git] / lib / percpu-refcount.c

#define pr_fmt(fmt) "%s: " fmt "\n", __func__

#include <linux/kernel.h>
#include <linux/percpu-refcount.h>

/*
 * Initially, a percpu refcount is just a set of percpu counters. Initially, we
 * don't try to detect the ref hitting 0 - which means that get/put can just
 * increment or decrement the local counter. Note that the counter on a
 * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
 * percpu counters will all sum to the correct value
 *
 * (More precisely: because moduler arithmatic is commutative the sum of all the
 * pcpu_count vars will be equal to what it would have been if all the gets and
 * puts were done to a single integer, even if some of the percpu integers
 * overflow or underflow).
 *
 * The real trick to implementing percpu refcounts is shutdown. We can't detect
 * the ref hitting 0 on every put - this would require global synchronization
 * and defeat the whole purpose of using percpu refs.
 *
 * What we do is require the user to keep track of the initial refcount; we know
 * the ref can't hit 0 before the user drops the initial ref, so as long as we
 * convert to non percpu mode before the initial ref is dropped everything
 * works.
 *
 * Converting to non percpu mode is done with some RCUish stuff in
 * percpu_ref_kill. Additionally, we need a bias value so that the
 * atomic_long_t can't hit 0 before we've added up all the percpu refs.
 */

#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))

static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
{
	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
}

/**
 * percpu_ref_init - initialize a percpu refcount
 * @ref: percpu_ref to initialize
 * @release: function which will be called when refcount hits 0
 * @gfp: allocation mask to use
 *
 * Initializes the refcount in single atomic counter mode with a refcount of 1;
 * analagous to atomic_long_set(ref, 1).
 *
 * Note that @release must not sleep - it may potentially be called from RCU
 * callback context by percpu_ref_kill().
 */
int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
		    gfp_t gfp)
{
	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);

	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
	if (!ref->pcpu_count_ptr)
		return -ENOMEM;

	ref->release = release;
	return 0;
}
EXPORT_SYMBOL_GPL(percpu_ref_init);

/**
 * percpu_ref_reinit - re-initialize a percpu refcount
 * @ref: perpcu_ref to re-initialize
 *
 * Re-initialize @ref so that it's in the same state as when it finished
 * percpu_ref_init().  @ref must have been initialized successfully, killed
 * and reached 0 but not exited.
 *
 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
 * this function is in progress.
 */
void percpu_ref_reinit(struct percpu_ref *ref)
{
	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
	int cpu;

	BUG_ON(!pcpu_count);
	WARN_ON(!percpu_ref_is_zero(ref));

	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);

	/*
	 * Restore per-cpu operation.  smp_store_release() is paired with
	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
	 * that the zeroing is visible to all percpu accesses which can see
	 * the following PCPU_REF_DEAD clearing.
	 */
	for_each_possible_cpu(cpu)
		*per_cpu_ptr(pcpu_count, cpu) = 0;

	smp_store_release(&ref->pcpu_count_ptr,
			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
}
EXPORT_SYMBOL_GPL(percpu_ref_reinit);

/**
 * percpu_ref_exit - undo percpu_ref_init()
 * @ref: percpu_ref to exit
 *
 * This function exits @ref.  The caller is responsible for ensuring that
 * @ref is no longer in active use.  The usual places to invoke this
 * function from are the @ref->release() callback or in init failure path
 * where percpu_ref_init() succeeded but other parts of the initialization
 * of the embedding object failed.
 */
void percpu_ref_exit(struct percpu_ref *ref)
{
	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);

	if (pcpu_count) {
		free_percpu(pcpu_count);
		ref->pcpu_count_ptr = PCPU_REF_DEAD;
	}
}
EXPORT_SYMBOL_GPL(percpu_ref_exit);

static void percpu_ref_kill_rcu(struct rcu_head *rcu)
{
	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
	unsigned long count = 0;
	int cpu;

	for_each_possible_cpu(cpu)
		count += *per_cpu_ptr(pcpu_count, cpu);

	pr_debug("global %ld pcpu %ld",
		 atomic_long_read(&ref->count), (long)count);

	/*
	 * It's crucial that we sum the percpu counters _before_ adding the sum
	 * to &ref->count; since gets could be happening on one cpu while puts
	 * happen on another, adding a single cpu's count could cause
	 * @ref->count to hit 0 before we've got a consistent value - but the
	 * sum of all the counts will be consistent and correct.
	 *
	 * Subtracting the bias value then has to happen _after_ adding count to
	 * &ref->count; we need the bias value to prevent &ref->count from
	 * reaching 0 before we add the percpu counts. But doing it at the same
	 * time is equivalent and saves us atomic operations:
	 */

	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);

	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
		  "percpu ref (%pf) <= 0 (%ld) after killed",
		  ref->release, atomic_long_read(&ref->count));

	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
	if (ref->confirm_kill)
		ref->confirm_kill(ref);

	/*
	 * Now we're in single atomic_t mode with a consistent refcount, so it's
	 * safe to drop our initial ref:
	 */
	percpu_ref_put(ref);
}

/**
 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
 * @ref: percpu_ref to kill
 * @confirm_kill: optional confirmation callback
 *
 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
 * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
 * called after @ref is seen as dead from all CPUs - all further
 * invocations of percpu_ref_tryget() will fail.  See percpu_ref_tryget()
 * for more details.
 *
 * Due to the way percpu_ref is implemented, @confirm_kill will be called
 * after at least one full RCU grace period has passed but this is an
 * implementation detail and callers must not depend on it.
 */
void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
				 percpu_ref_func_t *confirm_kill)
{
	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
		  "percpu_ref_kill() called more than once on %pf!",
		  ref->release);

	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
	ref->confirm_kill = confirm_kill;

	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
}
EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
Commit	Line	Data
215e262f KO	1	#define pr_fmt(fmt) "%s: " fmt "\n", __func__
	2
	3	#include <linux/kernel.h>
	4	#include <linux/percpu-refcount.h>
	5
	6	/*
	7	* Initially, a percpu refcount is just a set of percpu counters. Initially, we
	8	* don't try to detect the ref hitting 0 - which means that get/put can just
	9	* increment or decrement the local counter. Note that the counter on a
	10	* particular cpu can (and will) wrap - this is fine, when we go to shutdown the
	11	* percpu counters will all sum to the correct value
	12	*
	13	* (More precisely: because moduler arithmatic is commutative the sum of all the
	14	* pcpu_count vars will be equal to what it would have been if all the gets and
	15	* puts were done to a single integer, even if some of the percpu integers
	16	* overflow or underflow).
	17	*
	18	* The real trick to implementing percpu refcounts is shutdown. We can't detect
	19	* the ref hitting 0 on every put - this would require global synchronization
	20	* and defeat the whole purpose of using percpu refs.
	21	*
	22	* What we do is require the user to keep track of the initial refcount; we know
	23	* the ref can't hit 0 before the user drops the initial ref, so as long as we
	24	* convert to non percpu mode before the initial ref is dropped everything
	25	* works.
	26	*
	27	* Converting to non percpu mode is done with some RCUish stuff in
e625305b TH	28	* percpu_ref_kill. Additionally, we need a bias value so that the
e625305b TH	29	* atomic_long_t can't hit 0 before we've added up all the percpu refs.
215e262f KO	30	*/
215e262f KO	31
e625305b	32	#define PCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
215e262f	33
e625305b	34	static unsigned long __percpu pcpu_count_ptr(struct percpu_ref ref)
eae7975d	35	{
e625305b	36	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
eae7975d TH	37	}
eae7975d TH	38
215e262f KO	39	/**
215e262f KO	40	* percpu_ref_init - initialize a percpu refcount
ac899061 TH	41	* @ref: percpu_ref to initialize
ac899061 TH	42	* @release: function which will be called when refcount hits 0
a34375ef	43	* @gfp: allocation mask to use
215e262f KO	44	*
215e262f KO	45	* Initializes the refcount in single atomic counter mode with a refcount of 1;
e625305b	46	* analagous to atomic_long_set(ref, 1).
215e262f KO	47	*
	48	* Note that @release must not sleep - it may potentially be called from RCU
	49	* callback context by percpu_ref_kill().
	50	*/
a34375ef TH	51	int percpu_ref_init(struct percpu_ref ref, percpu_ref_func_t release,
a34375ef TH	52	gfp_t gfp)
215e262f	53	{
e625305b	54	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
215e262f	55
e625305b	56	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
7d742075	57	if (!ref->pcpu_count_ptr)
215e262f KO	58	return -ENOMEM;
	59
	60	ref->release = release;
	61	return 0;
	62	}
5e9dd373	63	EXPORT_SYMBOL_GPL(percpu_ref_init);
215e262f	64
2d722782 TH	65	/**
	66	* percpu_ref_reinit - re-initialize a percpu refcount
	67	* @ref: perpcu_ref to re-initialize
	68	*
	69	* Re-initialize @ref so that it's in the same state as when it finished
	70	* percpu_ref_init(). @ref must have been initialized successfully, killed
	71	* and reached 0 but not exited.
	72	*
	73	* Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
	74	* this function is in progress.
	75	*/
	76	void percpu_ref_reinit(struct percpu_ref *ref)
	77	{
e625305b	78	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
2d722782 TH	79	int cpu;
	80
	81	BUG_ON(!pcpu_count);
	82	WARN_ON(!percpu_ref_is_zero(ref));
	83
e625305b	84	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
2d722782 TH	85
	86	/*
	87	* Restore per-cpu operation. smp_store_release() is paired with
	88	* smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
	89	* that the zeroing is visible to all percpu accesses which can see
	90	* the following PCPU_REF_DEAD clearing.
	91	*/
	92	for_each_possible_cpu(cpu)
	93	*per_cpu_ptr(pcpu_count, cpu) = 0;
	94
	95	smp_store_release(&ref->pcpu_count_ptr,
	96	ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
	97	}
	98	EXPORT_SYMBOL_GPL(percpu_ref_reinit);
	99
bc497bd3	100	/**
9a1049da TH	101	* percpu_ref_exit - undo percpu_ref_init()
9a1049da TH	102	* @ref: percpu_ref to exit
bc497bd3	103	*
9a1049da TH	104	* This function exits @ref. The caller is responsible for ensuring that
	105	* @ref is no longer in active use. The usual places to invoke this
	106	* function from are the @ref->release() callback or in init failure path
	107	* where percpu_ref_init() succeeded but other parts of the initialization
	108	* of the embedding object failed.
bc497bd3	109	*/
9a1049da	110	void percpu_ref_exit(struct percpu_ref *ref)
bc497bd3	111	{
e625305b	112	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
bc497bd3 TH	113
bc497bd3 TH	114	if (pcpu_count) {
eae7975d	115	free_percpu(pcpu_count);
9a1049da	116	ref->pcpu_count_ptr = PCPU_REF_DEAD;
bc497bd3 TH	117	}
bc497bd3 TH	118	}
9a1049da	119	EXPORT_SYMBOL_GPL(percpu_ref_exit);
bc497bd3	120
215e262f KO	121	static void percpu_ref_kill_rcu(struct rcu_head *rcu)
	122	{
	123	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
e625305b TH	124	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
e625305b TH	125	unsigned long count = 0;
215e262f KO	126	int cpu;
215e262f KO	127
215e262f KO	128	for_each_possible_cpu(cpu)
	129	count += *per_cpu_ptr(pcpu_count, cpu);
	130
e625305b TH	131	pr_debug("global %ld pcpu %ld",
e625305b TH	132	atomic_long_read(&ref->count), (long)count);
215e262f KO	133
	134	/*
	135	* It's crucial that we sum the percpu counters _before_ adding the sum
	136	* to &ref->count; since gets could be happening on one cpu while puts
	137	* happen on another, adding a single cpu's count could cause
	138	* @ref->count to hit 0 before we've got a consistent value - but the
	139	* sum of all the counts will be consistent and correct.
	140	*
	141	* Subtracting the bias value then has to happen _after_ adding count to
	142	* &ref->count; we need the bias value to prevent &ref->count from
	143	* reaching 0 before we add the percpu counts. But doing it at the same
	144	* time is equivalent and saves us atomic operations:
	145	*/
	146
e625305b	147	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
215e262f	148
e625305b TH	149	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
	150	"percpu ref (%pf) <= 0 (%ld) after killed",
	151	ref->release, atomic_long_read(&ref->count));
687b0ad2	152
dbece3a0 TH	153	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
	154	if (ref->confirm_kill)
	155	ref->confirm_kill(ref);
	156
215e262f KO	157	/*
	158	* Now we're in single atomic_t mode with a consistent refcount, so it's
	159	* safe to drop our initial ref:
	160	*/
	161	percpu_ref_put(ref);
	162	}
	163
	164	/**
dbece3a0	165	* percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
ac899061	166	* @ref: percpu_ref to kill
dbece3a0	167	* @confirm_kill: optional confirmation callback
215e262f	168	*
dbece3a0 TH	169	* Equivalent to percpu_ref_kill() but also schedules kill confirmation if
	170	* @confirm_kill is not NULL. @confirm_kill, which may not block, will be
	171	* called after @ref is seen as dead from all CPUs - all further
	172	* invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget()
	173	* for more details.
215e262f	174	*
dbece3a0 TH	175	* Due to the way percpu_ref is implemented, @confirm_kill will be called
	176	* after at least one full RCU grace period has passed but this is an
	177	* implementation detail and callers must not depend on it.
215e262f	178	*/
dbece3a0 TH	179	void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
dbece3a0 TH	180	percpu_ref_func_t *confirm_kill)
215e262f	181	{
7d742075	182	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
4843c332 TH	183	"percpu_ref_kill() called more than once on %pf!",
4843c332 TH	184	ref->release);
215e262f	185
7d742075	186	ref->pcpu_count_ptr \|= PCPU_REF_DEAD;
dbece3a0	187	ref->confirm_kill = confirm_kill;
215e262f	188
a4244454	189	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
215e262f	190	}
5e9dd373	191	EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);