[mirror_ubuntu-jammy-kernel.git] / kernel / pid.c

/*
 * Generic pidhash and scalable, time-bounded PID allocator
 *
 * (C) 2002-2003 William Irwin, IBM
 * (C) 2004 William Irwin, Oracle
 * (C) 2002-2004 Ingo Molnar, Red Hat
 *
 * pid-structures are backing objects for tasks sharing a given ID to chain
 * against. There is very little to them aside from hashing them and
 * parking tasks using given ID's on a list.
 *
 * The hash is always changed with the tasklist_lock write-acquired,
 * and the hash is only accessed with the tasklist_lock at least
 * read-acquired, so there's no additional SMP locking needed here.
 *
 * We have a list of bitmap pages, which bitmaps represent the PID space.
 * Allocating and freeing PIDs is completely lockless. The worst-case
 * allocation scenario when all but one out of 1 million PIDs possible are
 * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
 * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
 */

#include <linux/mm.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/hash.h>
#include <linux/pid_namespace.h>
#include <linux/init_task.h>

#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
static struct hlist_head *pid_hash;
static int pidhash_shift;
struct pid init_struct_pid = INIT_STRUCT_PID;

int pid_max = PID_MAX_DEFAULT;

#define RESERVED_PIDS		300

int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;

#define BITS_PER_PAGE		(PAGE_SIZE*8)
#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)

static inline int mk_pid(struct pid_namespace *pid_ns,
		struct pidmap *map, int off)
{
	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
}

#define find_next_offset(map, off)					\
		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)

/*
 * PID-map pages start out as NULL, they get allocated upon
 * first use and are never deallocated. This way a low pid_max
 * value does not cause lots of bitmaps to be allocated, but
 * the scheme scales to up to 4 million PIDs, runtime.
 */
struct pid_namespace init_pid_ns = {
	.kref = {
		.refcount       = ATOMIC_INIT(2),
	},
	.pidmap = {
		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
	},
	.last_pid = 0,
	.child_reaper = &init_task
};

/*
 * Note: disable interrupts while the pidmap_lock is held as an
 * interrupt might come in and do read_lock(&tasklist_lock).
 *
 * If we don't disable interrupts there is a nasty deadlock between
 * detach_pid()->free_pid() and another cpu that does
 * spin_lock(&pidmap_lock) followed by an interrupt routine that does
 * read_lock(&tasklist_lock);
 *
 * After we clean up the tasklist_lock and know there are no
 * irq handlers that take it we can leave the interrupts enabled.
 * For now it is easier to be safe than to prove it can't happen.
 */

static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);

static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid)
{
	struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE;
	int offset = pid & BITS_PER_PAGE_MASK;

	clear_bit(offset, map->page);
	atomic_inc(&map->nr_free);
}

static int alloc_pidmap(struct pid_namespace *pid_ns)
{
	int i, offset, max_scan, pid, last = pid_ns->last_pid;
	struct pidmap *map;

	pid = last + 1;
	if (pid >= pid_max)
		pid = RESERVED_PIDS;
	offset = pid & BITS_PER_PAGE_MASK;
	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
	for (i = 0; i <= max_scan; ++i) {
		if (unlikely(!map->page)) {
			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
			/*
			 * Free the page if someone raced with us
			 * installing it:
			 */
			spin_lock_irq(&pidmap_lock);
			if (map->page)
				kfree(page);
			else
				map->page = page;
			spin_unlock_irq(&pidmap_lock);
			if (unlikely(!map->page))
				break;
		}
		if (likely(atomic_read(&map->nr_free))) {
			do {
				if (!test_and_set_bit(offset, map->page)) {
					atomic_dec(&map->nr_free);
					pid_ns->last_pid = pid;
					return pid;
				}
				offset = find_next_offset(map, offset);
				pid = mk_pid(pid_ns, map, offset);
			/*
			 * find_next_offset() found a bit, the pid from it
			 * is in-bounds, and if we fell back to the last
			 * bitmap block and the final block was the same
			 * as the starting point, pid is before last_pid.
			 */
			} while (offset < BITS_PER_PAGE && pid < pid_max &&
					(i != max_scan || pid < last ||
					    !((last+1) & BITS_PER_PAGE_MASK)));
		}
		if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
			++map;
			offset = 0;
		} else {
			map = &pid_ns->pidmap[0];
			offset = RESERVED_PIDS;
			if (unlikely(last == offset))
				break;
		}
		pid = mk_pid(pid_ns, map, offset);
	}
	return -1;
}

static int next_pidmap(struct pid_namespace *pid_ns, int last)
{
	int offset;
	struct pidmap *map, *end;

	offset = (last + 1) & BITS_PER_PAGE_MASK;
	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
	for (; map < end; map++, offset = 0) {
		if (unlikely(!map->page))
			continue;
		offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
		if (offset < BITS_PER_PAGE)
			return mk_pid(pid_ns, map, offset);
	}
	return -1;
}

fastcall void put_pid(struct pid *pid)
{
	struct pid_namespace *ns;

	if (!pid)
		return;

	/* FIXME - this must be the namespace this pid lives in */
	ns = &init_pid_ns;
	if ((atomic_read(&pid->count) == 1) ||
	     atomic_dec_and_test(&pid->count))
		kmem_cache_free(ns->pid_cachep, pid);
}
EXPORT_SYMBOL_GPL(put_pid);

static void delayed_put_pid(struct rcu_head *rhp)
{
	struct pid *pid = container_of(rhp, struct pid, rcu);
	put_pid(pid);
}

fastcall void free_pid(struct pid *pid)
{
	/* We can be called with write_lock_irq(&tasklist_lock) held */
	unsigned long flags;

	spin_lock_irqsave(&pidmap_lock, flags);
	hlist_del_rcu(&pid->pid_chain);
	spin_unlock_irqrestore(&pidmap_lock, flags);

	free_pidmap(&init_pid_ns, pid->nr);
	call_rcu(&pid->rcu, delayed_put_pid);
}

struct pid *alloc_pid(void)
{
	struct pid *pid;
	enum pid_type type;
	int nr = -1;
	struct pid_namespace *ns;

	ns = task_active_pid_ns(current);
	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
	if (!pid)
		goto out;

	nr = alloc_pidmap(ns);
	if (nr < 0)
		goto out_free;

	atomic_set(&pid->count, 1);
	pid->nr = nr;
	for (type = 0; type < PIDTYPE_MAX; ++type)
		INIT_HLIST_HEAD(&pid->tasks[type]);

	spin_lock_irq(&pidmap_lock);
	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
	spin_unlock_irq(&pidmap_lock);

out:
	return pid;

out_free:
	kmem_cache_free(ns->pid_cachep, pid);
	pid = NULL;
	goto out;
}

struct pid * fastcall find_pid(int nr)
{
	struct hlist_node *elem;
	struct pid *pid;

	hlist_for_each_entry_rcu(pid, elem,
			&pid_hash[pid_hashfn(nr)], pid_chain) {
		if (pid->nr == nr)
			return pid;
	}
	return NULL;
}
EXPORT_SYMBOL_GPL(find_pid);

/*
 * attach_pid() must be called with the tasklist_lock write-held.
 */
int fastcall attach_pid(struct task_struct *task, enum pid_type type,
		struct pid *pid)
{
	struct pid_link *link;

	link = &task->pids[type];
	link->pid = pid;
	hlist_add_head_rcu(&link->node, &pid->tasks[type]);

	return 0;
}

void fastcall detach_pid(struct task_struct *task, enum pid_type type)
{
	struct pid_link *link;
	struct pid *pid;
	int tmp;

	link = &task->pids[type];
	pid = link->pid;

	hlist_del_rcu(&link->node);
	link->pid = NULL;

	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
		if (!hlist_empty(&pid->tasks[tmp]))
			return;

	free_pid(pid);
}

/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
void fastcall transfer_pid(struct task_struct *old, struct task_struct *new,
			   enum pid_type type)
{
	new->pids[type].pid = old->pids[type].pid;
	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
	old->pids[type].pid = NULL;
}

struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
{
	struct task_struct *result = NULL;
	if (pid) {
		struct hlist_node *first;
		first = rcu_dereference(pid->tasks[type].first);
		if (first)
			result = hlist_entry(first, struct task_struct, pids[(type)].node);
	}
	return result;
}

/*
 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
 */
struct task_struct *find_task_by_pid_type(int type, int nr)
{
	return pid_task(find_pid(nr), type);
}

EXPORT_SYMBOL(find_task_by_pid_type);

struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
{
	struct pid *pid;
	rcu_read_lock();
	pid = get_pid(task->pids[type].pid);
	rcu_read_unlock();
	return pid;
}

struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
{
	struct task_struct *result;
	rcu_read_lock();
	result = pid_task(pid, type);
	if (result)
		get_task_struct(result);
	rcu_read_unlock();
	return result;
}

struct pid *find_get_pid(pid_t nr)
{
	struct pid *pid;

	rcu_read_lock();
	pid = get_pid(find_pid(nr));
	rcu_read_unlock();

	return pid;
}

/*
 * Used by proc to find the first pid that is greater then or equal to nr.
 *
 * If there is a pid at nr this function is exactly the same as find_pid.
 */
struct pid *find_ge_pid(int nr)
{
	struct pid *pid;

	do {
		pid = find_pid(nr);
		if (pid)
			break;
		nr = next_pidmap(task_active_pid_ns(current), nr);
	} while (nr > 0);

	return pid;
}
EXPORT_SYMBOL_GPL(find_get_pid);

struct pid_cache {
	int nr_ids;
	char name[16];
	struct kmem_cache *cachep;
	struct list_head list;
};

static LIST_HEAD(pid_caches_lh);
static DEFINE_MUTEX(pid_caches_mutex);

/*
 * creates the kmem cache to allocate pids from.
 * @nr_ids: the number of numerical ids this pid will have to carry
 */

static struct kmem_cache *create_pid_cachep(int nr_ids)
{
	struct pid_cache *pcache;
	struct kmem_cache *cachep;

	mutex_lock(&pid_caches_mutex);
	list_for_each_entry (pcache, &pid_caches_lh, list)
		if (pcache->nr_ids == nr_ids)
			goto out;

	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
	if (pcache == NULL)
		goto err_alloc;

	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
	cachep = kmem_cache_create(pcache->name,
			/* FIXME add numerical ids here */
			sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL);
	if (cachep == NULL)
		goto err_cachep;

	pcache->nr_ids = nr_ids;
	pcache->cachep = cachep;
	list_add(&pcache->list, &pid_caches_lh);
out:
	mutex_unlock(&pid_caches_mutex);
	return pcache->cachep;

err_cachep:
	kfree(pcache);
err_alloc:
	mutex_unlock(&pid_caches_mutex);
	return NULL;
}

struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
{
	BUG_ON(!old_ns);
	get_pid_ns(old_ns);
	return old_ns;
}

void free_pid_ns(struct kref *kref)
{
	struct pid_namespace *ns;

	ns = container_of(kref, struct pid_namespace, kref);
	kfree(ns);
}

/*
 * The pid hash table is scaled according to the amount of memory in the
 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 * more.
 */
void __init pidhash_init(void)
{
	int i, pidhash_size;
	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);

	pidhash_shift = max(4, fls(megabytes * 4));
	pidhash_shift = min(12, pidhash_shift);
	pidhash_size = 1 << pidhash_shift;

	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
		pidhash_size, pidhash_shift,
		pidhash_size * sizeof(struct hlist_head));

	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
	if (!pid_hash)
		panic("Could not alloc pidhash!\n");
	for (i = 0; i < pidhash_size; i++)
		INIT_HLIST_HEAD(&pid_hash[i]);
}

void __init pidmap_init(void)
{
	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
	/* Reserve PID 0. We never call free_pidmap(0) */
	set_bit(0, init_pid_ns.pidmap[0].page);
	atomic_dec(&init_pid_ns.pidmap[0].nr_free);

	init_pid_ns.pid_cachep = create_pid_cachep(1);
	if (init_pid_ns.pid_cachep == NULL)
		panic("Can't create pid_1 cachep\n");
}
Commit	Line	Data
	1	/*
	2	* Generic pidhash and scalable, time-bounded PID allocator
	3	*
	4	* (C) 2002-2003 William Irwin, IBM
	5	* (C) 2004 William Irwin, Oracle
	6	* (C) 2002-2004 Ingo Molnar, Red Hat
	7	*
	8	* pid-structures are backing objects for tasks sharing a given ID to chain
	9	* against. There is very little to them aside from hashing them and
	10	* parking tasks using given ID's on a list.
	11	*
	12	* The hash is always changed with the tasklist_lock write-acquired,
	13	* and the hash is only accessed with the tasklist_lock at least
	14	* read-acquired, so there's no additional SMP locking needed here.
	15	*
	16	* We have a list of bitmap pages, which bitmaps represent the PID space.
	17	* Allocating and freeing PIDs is completely lockless. The worst-case
	18	* allocation scenario when all but one out of 1 million PIDs possible are
	19	* allocated already: the scanning of 32 list entries and at most PAGE_SIZE
	20	* bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
	21	*/
	22
	23	#include <linux/mm.h>
	24	#include <linux/module.h>
	25	#include <linux/slab.h>
	26	#include <linux/init.h>
	27	#include <linux/bootmem.h>
	28	#include <linux/hash.h>
	29	#include <linux/pid_namespace.h>
	30	#include <linux/init_task.h>
	31
	32	#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
	33	static struct hlist_head *pid_hash;
	34	static int pidhash_shift;
	35	struct pid init_struct_pid = INIT_STRUCT_PID;
	36
	37	int pid_max = PID_MAX_DEFAULT;
	38
	39	#define RESERVED_PIDS 300
	40
	41	int pid_max_min = RESERVED_PIDS + 1;
	42	int pid_max_max = PID_MAX_LIMIT;
	43
	44	#define BITS_PER_PAGE (PAGE_SIZE*8)
	45	#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
	46
	47	static inline int mk_pid(struct pid_namespace *pid_ns,
	48	struct pidmap *map, int off)
	49	{
	50	return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
	51	}
	52
	53	#define find_next_offset(map, off) \
	54	find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
	55
	56	/*
	57	* PID-map pages start out as NULL, they get allocated upon
	58	* first use and are never deallocated. This way a low pid_max
	59	* value does not cause lots of bitmaps to be allocated, but
	60	* the scheme scales to up to 4 million PIDs, runtime.
	61	*/
	62	struct pid_namespace init_pid_ns = {
	63	.kref = {
	64	.refcount = ATOMIC_INIT(2),
	65	},
	66	.pidmap = {
	67	[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
	68	},
	69	.last_pid = 0,
	70	.child_reaper = &init_task
	71	};
	72
	73	/*
	74	* Note: disable interrupts while the pidmap_lock is held as an
	75	* interrupt might come in and do read_lock(&tasklist_lock).
	76	*
	77	* If we don't disable interrupts there is a nasty deadlock between
	78	* detach_pid()->free_pid() and another cpu that does
	79	* spin_lock(&pidmap_lock) followed by an interrupt routine that does
	80	* read_lock(&tasklist_lock);
	81	*
	82	* After we clean up the tasklist_lock and know there are no
	83	* irq handlers that take it we can leave the interrupts enabled.
	84	* For now it is easier to be safe than to prove it can't happen.
	85	*/
	86
	87	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
	88
	89	static fastcall void free_pidmap(struct pid_namespace *pid_ns, int pid)
	90	{
	91	struct pidmap *map = pid_ns->pidmap + pid / BITS_PER_PAGE;
	92	int offset = pid & BITS_PER_PAGE_MASK;
	93
	94	clear_bit(offset, map->page);
	95	atomic_inc(&map->nr_free);
	96	}
	97
	98	static int alloc_pidmap(struct pid_namespace *pid_ns)
	99	{
	100	int i, offset, max_scan, pid, last = pid_ns->last_pid;
	101	struct pidmap *map;
	102
	103	pid = last + 1;
	104	if (pid >= pid_max)
	105	pid = RESERVED_PIDS;
	106	offset = pid & BITS_PER_PAGE_MASK;
	107	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
	108	max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
	109	for (i = 0; i <= max_scan; ++i) {
	110	if (unlikely(!map->page)) {
	111	void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
	112	/*
	113	* Free the page if someone raced with us
	114	* installing it:
	115	*/
	116	spin_lock_irq(&pidmap_lock);
	117	if (map->page)
	118	kfree(page);
	119	else
	120	map->page = page;
	121	spin_unlock_irq(&pidmap_lock);
	122	if (unlikely(!map->page))
	123	break;
	124	}
	125	if (likely(atomic_read(&map->nr_free))) {
	126	do {
	127	if (!test_and_set_bit(offset, map->page)) {
	128	atomic_dec(&map->nr_free);
	129	pid_ns->last_pid = pid;
	130	return pid;
	131	}
	132	offset = find_next_offset(map, offset);
	133	pid = mk_pid(pid_ns, map, offset);
	134	/*
	135	* find_next_offset() found a bit, the pid from it
	136	* is in-bounds, and if we fell back to the last
	137	* bitmap block and the final block was the same
	138	* as the starting point, pid is before last_pid.
	139	*/
	140	} while (offset < BITS_PER_PAGE && pid < pid_max &&
	141	(i != max_scan \|\| pid < last \|\|
	142	!((last+1) & BITS_PER_PAGE_MASK)));
	143	}
	144	if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
	145	++map;
	146	offset = 0;
	147	} else {
	148	map = &pid_ns->pidmap[0];
	149	offset = RESERVED_PIDS;
	150	if (unlikely(last == offset))
	151	break;
	152	}
	153	pid = mk_pid(pid_ns, map, offset);
	154	}
	155	return -1;
	156	}
	157
	158	static int next_pidmap(struct pid_namespace *pid_ns, int last)
	159	{
	160	int offset;
	161	struct pidmap map, end;
	162
	163	offset = (last + 1) & BITS_PER_PAGE_MASK;
	164	map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
	165	end = &pid_ns->pidmap[PIDMAP_ENTRIES];
	166	for (; map < end; map++, offset = 0) {
	167	if (unlikely(!map->page))
	168	continue;
	169	offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
	170	if (offset < BITS_PER_PAGE)
	171	return mk_pid(pid_ns, map, offset);
	172	}
	173	return -1;
	174	}
	175
	176	fastcall void put_pid(struct pid *pid)
	177	{
	178	struct pid_namespace *ns;
	179
	180	if (!pid)
	181	return;
	182
	183	/* FIXME - this must be the namespace this pid lives in */
	184	ns = &init_pid_ns;
	185	if ((atomic_read(&pid->count) == 1) \|\|
	186	atomic_dec_and_test(&pid->count))
	187	kmem_cache_free(ns->pid_cachep, pid);
	188	}
	189	EXPORT_SYMBOL_GPL(put_pid);
	190
	191	static void delayed_put_pid(struct rcu_head *rhp)
	192	{
	193	struct pid *pid = container_of(rhp, struct pid, rcu);
	194	put_pid(pid);
	195	}
	196
	197	fastcall void free_pid(struct pid *pid)
	198	{
	199	/* We can be called with write_lock_irq(&tasklist_lock) held */
	200	unsigned long flags;
	201
	202	spin_lock_irqsave(&pidmap_lock, flags);
	203	hlist_del_rcu(&pid->pid_chain);
	204	spin_unlock_irqrestore(&pidmap_lock, flags);
	205
	206	free_pidmap(&init_pid_ns, pid->nr);
	207	call_rcu(&pid->rcu, delayed_put_pid);
	208	}
	209
	210	struct pid *alloc_pid(void)
	211	{
	212	struct pid *pid;
	213	enum pid_type type;
	214	int nr = -1;
	215	struct pid_namespace *ns;
	216
	217	ns = task_active_pid_ns(current);
	218	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
	219	if (!pid)
	220	goto out;
	221
	222	nr = alloc_pidmap(ns);
	223	if (nr < 0)
	224	goto out_free;
	225
	226	atomic_set(&pid->count, 1);
	227	pid->nr = nr;
	228	for (type = 0; type < PIDTYPE_MAX; ++type)
	229	INIT_HLIST_HEAD(&pid->tasks[type]);
	230
	231	spin_lock_irq(&pidmap_lock);
	232	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
	233	spin_unlock_irq(&pidmap_lock);
	234
	235	out:
	236	return pid;
	237
	238	out_free:
	239	kmem_cache_free(ns->pid_cachep, pid);
	240	pid = NULL;
	241	goto out;
	242	}
	243
	244	struct pid * fastcall find_pid(int nr)
	245	{
	246	struct hlist_node *elem;
	247	struct pid *pid;
	248
	249	hlist_for_each_entry_rcu(pid, elem,
	250	&pid_hash[pid_hashfn(nr)], pid_chain) {
	251	if (pid->nr == nr)
	252	return pid;
	253	}
	254	return NULL;
	255	}
	256	EXPORT_SYMBOL_GPL(find_pid);
	257
	258	/*
	259	* attach_pid() must be called with the tasklist_lock write-held.
	260	*/
	261	int fastcall attach_pid(struct task_struct *task, enum pid_type type,
	262	struct pid *pid)
	263	{
	264	struct pid_link *link;
	265
	266	link = &task->pids[type];
	267	link->pid = pid;
	268	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
	269
	270	return 0;
	271	}
	272
	273	void fastcall detach_pid(struct task_struct *task, enum pid_type type)
	274	{
	275	struct pid_link *link;
	276	struct pid *pid;
	277	int tmp;
	278
	279	link = &task->pids[type];
	280	pid = link->pid;
	281
	282	hlist_del_rcu(&link->node);
	283	link->pid = NULL;
	284
	285	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
	286	if (!hlist_empty(&pid->tasks[tmp]))
	287	return;
	288
	289	free_pid(pid);
	290	}
	291
	292	/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
	293	void fastcall transfer_pid(struct task_struct old, struct task_struct new,
	294	enum pid_type type)
	295	{
	296	new->pids[type].pid = old->pids[type].pid;
	297	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
	298	old->pids[type].pid = NULL;
	299	}
	300
	301	struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
	302	{
	303	struct task_struct *result = NULL;
	304	if (pid) {
	305	struct hlist_node *first;
	306	first = rcu_dereference(pid->tasks[type].first);
	307	if (first)
	308	result = hlist_entry(first, struct task_struct, pids[(type)].node);
	309	}
	310	return result;
	311	}
	312
	313	/*
	314	* Must be called under rcu_read_lock() or with tasklist_lock read-held.
	315	*/
	316	struct task_struct *find_task_by_pid_type(int type, int nr)
	317	{
	318	return pid_task(find_pid(nr), type);
	319	}
	320
	321	EXPORT_SYMBOL(find_task_by_pid_type);
	322
	323	struct pid get_task_pid(struct task_struct task, enum pid_type type)
	324	{
	325	struct pid *pid;
	326	rcu_read_lock();
	327	pid = get_pid(task->pids[type].pid);
	328	rcu_read_unlock();
	329	return pid;
	330	}
	331
	332	struct task_struct fastcall get_pid_task(struct pid pid, enum pid_type type)
	333	{
	334	struct task_struct *result;
	335	rcu_read_lock();
	336	result = pid_task(pid, type);
	337	if (result)
	338	get_task_struct(result);
	339	rcu_read_unlock();
	340	return result;
	341	}
	342
	343	struct pid *find_get_pid(pid_t nr)
	344	{
	345	struct pid *pid;
	346
	347	rcu_read_lock();
	348	pid = get_pid(find_pid(nr));
	349	rcu_read_unlock();
	350
	351	return pid;
	352	}
	353
	354	/*
	355	* Used by proc to find the first pid that is greater then or equal to nr.
	356	*
	357	* If there is a pid at nr this function is exactly the same as find_pid.
	358	*/
	359	struct pid *find_ge_pid(int nr)
	360	{
	361	struct pid *pid;
	362
	363	do {
	364	pid = find_pid(nr);
	365	if (pid)
	366	break;
	367	nr = next_pidmap(task_active_pid_ns(current), nr);
	368	} while (nr > 0);
	369
	370	return pid;
	371	}
	372	EXPORT_SYMBOL_GPL(find_get_pid);
	373
	374	struct pid_cache {
	375	int nr_ids;
	376	char name[16];
	377	struct kmem_cache *cachep;
	378	struct list_head list;
	379	};
	380
	381	static LIST_HEAD(pid_caches_lh);
	382	static DEFINE_MUTEX(pid_caches_mutex);
	383
	384	/*
	385	* creates the kmem cache to allocate pids from.
	386	* @nr_ids: the number of numerical ids this pid will have to carry
	387	*/
	388
	389	static struct kmem_cache *create_pid_cachep(int nr_ids)
	390	{
	391	struct pid_cache *pcache;
	392	struct kmem_cache *cachep;
	393
	394	mutex_lock(&pid_caches_mutex);
	395	list_for_each_entry (pcache, &pid_caches_lh, list)
	396	if (pcache->nr_ids == nr_ids)
	397	goto out;
	398
	399	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
	400	if (pcache == NULL)
	401	goto err_alloc;
	402
	403	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
	404	cachep = kmem_cache_create(pcache->name,
	405	/* FIXME add numerical ids here */
	406	sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL);
	407	if (cachep == NULL)
	408	goto err_cachep;
	409
	410	pcache->nr_ids = nr_ids;
	411	pcache->cachep = cachep;
	412	list_add(&pcache->list, &pid_caches_lh);
	413	out:
	414	mutex_unlock(&pid_caches_mutex);
	415	return pcache->cachep;
	416
	417	err_cachep:
	418	kfree(pcache);
	419	err_alloc:
	420	mutex_unlock(&pid_caches_mutex);
	421	return NULL;
	422	}
	423
	424	struct pid_namespace copy_pid_ns(unsigned long flags, struct pid_namespace old_ns)
	425	{
	426	BUG_ON(!old_ns);
	427	get_pid_ns(old_ns);
	428	return old_ns;
	429	}
	430
	431	void free_pid_ns(struct kref *kref)
	432	{
	433	struct pid_namespace *ns;
	434
	435	ns = container_of(kref, struct pid_namespace, kref);
	436	kfree(ns);
	437	}
	438
	439	/*
	440	* The pid hash table is scaled according to the amount of memory in the
	441	* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
	442	* more.
	443	*/
	444	void __init pidhash_init(void)
	445	{
	446	int i, pidhash_size;
	447	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
	448
	449	pidhash_shift = max(4, fls(megabytes * 4));
	450	pidhash_shift = min(12, pidhash_shift);
	451	pidhash_size = 1 << pidhash_shift;
	452
	453	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
	454	pidhash_size, pidhash_shift,
	455	pidhash_size * sizeof(struct hlist_head));
	456
	457	pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
	458	if (!pid_hash)
	459	panic("Could not alloc pidhash!\n");
	460	for (i = 0; i < pidhash_size; i++)
	461	INIT_HLIST_HEAD(&pid_hash[i]);
	462	}
	463
	464	void __init pidmap_init(void)
	465	{
	466	init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
	467	/* Reserve PID 0. We never call free_pidmap(0) */
	468	set_bit(0, init_pid_ns.pidmap[0].page);
	469	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
	470
	471	init_pid_ns.pid_cachep = create_pid_cachep(1);
	472	if (init_pid_ns.pid_cachep == NULL)
	473	panic("Can't create pid_1 cachep\n");
	474	}