2 * Functions related to io context handling
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/init.h>
8 #include <linux/blkdev.h>
9 #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
10 #include <linux/slab.h>
15 * For io context allocations
17 static struct kmem_cache
*iocontext_cachep
;
20 * get_io_context - increment reference count to io_context
21 * @ioc: io_context to get
23 * Increment reference count to @ioc.
25 void get_io_context(struct io_context
*ioc
)
27 BUG_ON(atomic_long_read(&ioc
->refcount
) <= 0);
28 atomic_long_inc(&ioc
->refcount
);
30 EXPORT_SYMBOL(get_io_context
);
33 * Releasing ioc may nest into another put_io_context() leading to nested
34 * fast path release. As the ioc's can't be the same, this is okay but
35 * makes lockdep whine. Keep track of nesting and use it as subclass.
38 #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0)
39 #define ioc_release_depth_inc(q) (q)->ioc_release_depth++
40 #define ioc_release_depth_dec(q) (q)->ioc_release_depth--
42 #define ioc_release_depth(q) 0
43 #define ioc_release_depth_inc(q) do { } while (0)
44 #define ioc_release_depth_dec(q) do { } while (0)
48 * Slow path for ioc release in put_io_context(). Performs double-lock
49 * dancing to unlink all icq's and then frees ioc.
51 static void ioc_release_fn(struct work_struct
*work
)
53 struct io_context
*ioc
= container_of(work
, struct io_context
,
55 struct request_queue
*last_q
= NULL
;
57 spin_lock_irq(&ioc
->lock
);
59 while (!hlist_empty(&ioc
->icq_list
)) {
60 struct io_cq
*icq
= hlist_entry(ioc
->icq_list
.first
,
61 struct io_cq
, ioc_node
);
62 struct request_queue
*this_q
= icq
->q
;
64 if (this_q
!= last_q
) {
66 * Need to switch to @this_q. Once we release
67 * @ioc->lock, it can go away along with @cic.
70 __blk_get_queue(this_q
);
73 * blk_put_queue() might sleep thanks to kobject
74 * idiocy. Always release both locks, put and
78 spin_unlock(last_q
->queue_lock
);
79 spin_unlock_irq(&ioc
->lock
);
80 blk_put_queue(last_q
);
82 spin_unlock_irq(&ioc
->lock
);
86 spin_lock_irq(this_q
->queue_lock
);
87 spin_lock(&ioc
->lock
);
90 ioc_release_depth_inc(this_q
);
93 ioc_release_depth_dec(this_q
);
97 spin_unlock(last_q
->queue_lock
);
98 spin_unlock_irq(&ioc
->lock
);
99 blk_put_queue(last_q
);
101 spin_unlock_irq(&ioc
->lock
);
104 kmem_cache_free(iocontext_cachep
, ioc
);
108 * put_io_context - put a reference of io_context
109 * @ioc: io_context to put
110 * @locked_q: request_queue the caller is holding queue_lock of (hint)
112 * Decrement reference count of @ioc and release it if the count reaches
113 * zero. If the caller is holding queue_lock of a queue, it can indicate
114 * that with @locked_q. This is an optimization hint and the caller is
115 * allowed to pass in %NULL even when it's holding a queue_lock.
117 void put_io_context(struct io_context
*ioc
, struct request_queue
*locked_q
)
119 struct request_queue
*last_q
= locked_q
;
125 BUG_ON(atomic_long_read(&ioc
->refcount
) <= 0);
127 lockdep_assert_held(locked_q
->queue_lock
);
129 if (!atomic_long_dec_and_test(&ioc
->refcount
))
133 * Destroy @ioc. This is a bit messy because icq's are chained
134 * from both ioc and queue, and ioc->lock nests inside queue_lock.
135 * The inner ioc->lock should be held to walk our icq_list and then
136 * for each icq the outer matching queue_lock should be grabbed.
137 * ie. We need to do reverse-order double lock dancing.
139 * Another twist is that we are often called with one of the
140 * matching queue_locks held as indicated by @locked_q, which
141 * prevents performing double-lock dance for other queues.
143 * So, we do it in two stages. The fast path uses the queue_lock
144 * the caller is holding and, if other queues need to be accessed,
145 * uses trylock to avoid introducing locking dependency. This can
146 * handle most cases, especially if @ioc was performing IO on only
149 * If trylock doesn't cut it, we defer to @ioc->release_work which
150 * can do all the double-locking dancing.
152 spin_lock_irqsave_nested(&ioc
->lock
, flags
,
153 ioc_release_depth(locked_q
));
155 while (!hlist_empty(&ioc
->icq_list
)) {
156 struct io_cq
*icq
= hlist_entry(ioc
->icq_list
.first
,
157 struct io_cq
, ioc_node
);
158 struct request_queue
*this_q
= icq
->q
;
160 if (this_q
!= last_q
) {
161 if (last_q
&& last_q
!= locked_q
)
162 spin_unlock(last_q
->queue_lock
);
165 if (!spin_trylock(this_q
->queue_lock
))
170 ioc_release_depth_inc(this_q
);
173 ioc_release_depth_dec(this_q
);
176 if (last_q
&& last_q
!= locked_q
)
177 spin_unlock(last_q
->queue_lock
);
179 spin_unlock_irqrestore(&ioc
->lock
, flags
);
181 /* if no icq is left, we're done; otherwise, kick release_work */
182 if (hlist_empty(&ioc
->icq_list
))
183 kmem_cache_free(iocontext_cachep
, ioc
);
185 schedule_work(&ioc
->release_work
);
187 EXPORT_SYMBOL(put_io_context
);
189 /* Called by the exiting task */
190 void exit_io_context(struct task_struct
*task
)
192 struct io_context
*ioc
;
194 /* PF_EXITING prevents new io_context from being attached to @task */
195 WARN_ON_ONCE(!(current
->flags
& PF_EXITING
));
198 ioc
= task
->io_context
;
199 task
->io_context
= NULL
;
202 atomic_dec(&ioc
->nr_tasks
);
203 put_io_context(ioc
, NULL
);
206 void create_io_context_slowpath(struct task_struct
*task
, gfp_t gfp_flags
,
209 struct io_context
*ioc
;
211 ioc
= kmem_cache_alloc_node(iocontext_cachep
, gfp_flags
| __GFP_ZERO
,
217 atomic_long_set(&ioc
->refcount
, 1);
218 atomic_set(&ioc
->nr_tasks
, 1);
219 spin_lock_init(&ioc
->lock
);
220 INIT_RADIX_TREE(&ioc
->icq_tree
, GFP_ATOMIC
| __GFP_HIGH
);
221 INIT_HLIST_HEAD(&ioc
->icq_list
);
222 INIT_WORK(&ioc
->release_work
, ioc_release_fn
);
224 /* try to install, somebody might already have beaten us to it */
226 if (!task
->io_context
&& !(task
->flags
& PF_EXITING
))
227 task
->io_context
= ioc
;
229 kmem_cache_free(iocontext_cachep
, ioc
);
232 EXPORT_SYMBOL(create_io_context_slowpath
);
235 * get_task_io_context - get io_context of a task
236 * @task: task of interest
237 * @gfp_flags: allocation flags, used if allocation is necessary
238 * @node: allocation node, used if allocation is necessary
240 * Return io_context of @task. If it doesn't exist, it is created with
241 * @gfp_flags and @node. The returned io_context has its reference count
244 * This function always goes through task_lock() and it's better to use
245 * %current->io_context + get_io_context() for %current.
247 struct io_context
*get_task_io_context(struct task_struct
*task
,
248 gfp_t gfp_flags
, int node
)
250 struct io_context
*ioc
;
252 might_sleep_if(gfp_flags
& __GFP_WAIT
);
256 ioc
= task
->io_context
;
263 } while (create_io_context(task
, gfp_flags
, node
));
267 EXPORT_SYMBOL(get_task_io_context
);
269 void ioc_set_changed(struct io_context
*ioc
, int which
)
272 struct hlist_node
*n
;
274 hlist_for_each_entry(icq
, n
, &ioc
->icq_list
, ioc_node
)
275 set_bit(which
, &icq
->changed
);
279 * ioc_ioprio_changed - notify ioprio change
280 * @ioc: io_context of interest
281 * @ioprio: new ioprio
283 * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all
284 * icq's. iosched is responsible for checking the bit and applying it on
285 * request issue path.
287 void ioc_ioprio_changed(struct io_context
*ioc
, int ioprio
)
291 spin_lock_irqsave(&ioc
->lock
, flags
);
292 ioc
->ioprio
= ioprio
;
293 ioc_set_changed(ioc
, ICQ_IOPRIO_CHANGED
);
294 spin_unlock_irqrestore(&ioc
->lock
, flags
);
298 * ioc_cgroup_changed - notify cgroup change
299 * @ioc: io_context of interest
301 * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's.
302 * iosched is responsible for checking the bit and applying it on request
305 void ioc_cgroup_changed(struct io_context
*ioc
)
309 spin_lock_irqsave(&ioc
->lock
, flags
);
310 ioc_set_changed(ioc
, ICQ_CGROUP_CHANGED
);
311 spin_unlock_irqrestore(&ioc
->lock
, flags
);
314 static int __init
blk_ioc_init(void)
316 iocontext_cachep
= kmem_cache_create("blkdev_ioc",
317 sizeof(struct io_context
), 0, SLAB_PANIC
, NULL
);
320 subsys_initcall(blk_ioc_init
);