]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * include/linux/backing-dev.h | |
3 | * | |
4 | * low-level device information and state which is propagated up through | |
5 | * to high-level code. | |
6 | */ | |
7 | ||
8 | #ifndef _LINUX_BACKING_DEV_H | |
9 | #define _LINUX_BACKING_DEV_H | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/fs.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/blkdev.h> | |
15 | #include <linux/writeback.h> | |
16 | #include <linux/blk-cgroup.h> | |
17 | #include <linux/backing-dev-defs.h> | |
18 | #include <linux/slab.h> | |
19 | ||
20 | int __must_check bdi_init(struct backing_dev_info *bdi); | |
21 | ||
22 | static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) | |
23 | { | |
24 | kref_get(&bdi->refcnt); | |
25 | return bdi; | |
26 | } | |
27 | ||
28 | void bdi_put(struct backing_dev_info *bdi); | |
29 | ||
30 | __printf(3, 4) | |
31 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |
32 | const char *fmt, ...); | |
33 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | |
34 | int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); | |
35 | void bdi_unregister(struct backing_dev_info *bdi); | |
36 | ||
37 | int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); | |
38 | void bdi_destroy(struct backing_dev_info *bdi); | |
39 | struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); | |
40 | ||
41 | void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, | |
42 | bool range_cyclic, enum wb_reason reason); | |
43 | void wb_start_background_writeback(struct bdi_writeback *wb); | |
44 | void wb_workfn(struct work_struct *work); | |
45 | void wb_wakeup_delayed(struct bdi_writeback *wb); | |
46 | ||
47 | extern spinlock_t bdi_lock; | |
48 | extern struct list_head bdi_list; | |
49 | ||
50 | extern struct workqueue_struct *bdi_wq; | |
51 | ||
52 | static inline bool wb_has_dirty_io(struct bdi_writeback *wb) | |
53 | { | |
54 | return test_bit(WB_has_dirty_io, &wb->state); | |
55 | } | |
56 | ||
57 | static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) | |
58 | { | |
59 | /* | |
60 | * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are | |
61 | * any dirty wbs. See wb_update_write_bandwidth(). | |
62 | */ | |
63 | return atomic_long_read(&bdi->tot_write_bandwidth); | |
64 | } | |
65 | ||
66 | static inline void __add_wb_stat(struct bdi_writeback *wb, | |
67 | enum wb_stat_item item, s64 amount) | |
68 | { | |
69 | __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); | |
70 | } | |
71 | ||
72 | static inline void __inc_wb_stat(struct bdi_writeback *wb, | |
73 | enum wb_stat_item item) | |
74 | { | |
75 | __add_wb_stat(wb, item, 1); | |
76 | } | |
77 | ||
78 | static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) | |
79 | { | |
80 | unsigned long flags; | |
81 | ||
82 | local_irq_save(flags); | |
83 | __inc_wb_stat(wb, item); | |
84 | local_irq_restore(flags); | |
85 | } | |
86 | ||
87 | static inline void __dec_wb_stat(struct bdi_writeback *wb, | |
88 | enum wb_stat_item item) | |
89 | { | |
90 | __add_wb_stat(wb, item, -1); | |
91 | } | |
92 | ||
93 | static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) | |
94 | { | |
95 | unsigned long flags; | |
96 | ||
97 | local_irq_save(flags); | |
98 | __dec_wb_stat(wb, item); | |
99 | local_irq_restore(flags); | |
100 | } | |
101 | ||
102 | static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) | |
103 | { | |
104 | return percpu_counter_read_positive(&wb->stat[item]); | |
105 | } | |
106 | ||
107 | static inline s64 __wb_stat_sum(struct bdi_writeback *wb, | |
108 | enum wb_stat_item item) | |
109 | { | |
110 | return percpu_counter_sum_positive(&wb->stat[item]); | |
111 | } | |
112 | ||
113 | static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) | |
114 | { | |
115 | s64 sum; | |
116 | unsigned long flags; | |
117 | ||
118 | local_irq_save(flags); | |
119 | sum = __wb_stat_sum(wb, item); | |
120 | local_irq_restore(flags); | |
121 | ||
122 | return sum; | |
123 | } | |
124 | ||
125 | extern void wb_writeout_inc(struct bdi_writeback *wb); | |
126 | ||
127 | /* | |
128 | * maximal error of a stat counter. | |
129 | */ | |
130 | static inline unsigned long wb_stat_error(struct bdi_writeback *wb) | |
131 | { | |
132 | #ifdef CONFIG_SMP | |
133 | return nr_cpu_ids * WB_STAT_BATCH; | |
134 | #else | |
135 | return 1; | |
136 | #endif | |
137 | } | |
138 | ||
139 | int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); | |
140 | int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); | |
141 | ||
142 | /* | |
143 | * Flags in backing_dev_info::capability | |
144 | * | |
145 | * The first three flags control whether dirty pages will contribute to the | |
146 | * VM's accounting and whether writepages() should be called for dirty pages | |
147 | * (something that would not, for example, be appropriate for ramfs) | |
148 | * | |
149 | * WARNING: these flags are closely related and should not normally be | |
150 | * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these | |
151 | * three flags into a single convenience macro. | |
152 | * | |
153 | * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting | |
154 | * BDI_CAP_NO_WRITEBACK: Don't write pages back | |
155 | * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages | |
156 | * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. | |
157 | * | |
158 | * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. | |
159 | */ | |
160 | #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 | |
161 | #define BDI_CAP_NO_WRITEBACK 0x00000002 | |
162 | #define BDI_CAP_NO_ACCT_WB 0x00000004 | |
163 | #define BDI_CAP_STABLE_WRITES 0x00000008 | |
164 | #define BDI_CAP_STRICTLIMIT 0x00000010 | |
165 | #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 | |
166 | ||
167 | #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ | |
168 | (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) | |
169 | ||
170 | extern struct backing_dev_info noop_backing_dev_info; | |
171 | ||
172 | /** | |
173 | * writeback_in_progress - determine whether there is writeback in progress | |
174 | * @wb: bdi_writeback of interest | |
175 | * | |
176 | * Determine whether there is writeback waiting to be handled against a | |
177 | * bdi_writeback. | |
178 | */ | |
179 | static inline bool writeback_in_progress(struct bdi_writeback *wb) | |
180 | { | |
181 | return test_bit(WB_writeback_running, &wb->state); | |
182 | } | |
183 | ||
184 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | |
185 | { | |
186 | struct super_block *sb; | |
187 | ||
188 | if (!inode) | |
189 | return &noop_backing_dev_info; | |
190 | ||
191 | sb = inode->i_sb; | |
192 | #ifdef CONFIG_BLOCK | |
193 | if (sb_is_blkdev_sb(sb)) | |
194 | return I_BDEV(inode)->bd_bdi; | |
195 | #endif | |
196 | return sb->s_bdi; | |
197 | } | |
198 | ||
199 | static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) | |
200 | { | |
201 | struct backing_dev_info *bdi = wb->bdi; | |
202 | ||
203 | if (bdi->congested_fn) | |
204 | return bdi->congested_fn(bdi->congested_data, cong_bits); | |
205 | return wb->congested->state & cong_bits; | |
206 | } | |
207 | ||
208 | long congestion_wait(int sync, long timeout); | |
209 | long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); | |
210 | int pdflush_proc_obsolete(struct ctl_table *table, int write, | |
211 | void __user *buffer, size_t *lenp, loff_t *ppos); | |
212 | ||
213 | static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) | |
214 | { | |
215 | return bdi->capabilities & BDI_CAP_STABLE_WRITES; | |
216 | } | |
217 | ||
218 | static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) | |
219 | { | |
220 | return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); | |
221 | } | |
222 | ||
223 | static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) | |
224 | { | |
225 | return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); | |
226 | } | |
227 | ||
228 | static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) | |
229 | { | |
230 | /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */ | |
231 | return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB | | |
232 | BDI_CAP_NO_WRITEBACK)); | |
233 | } | |
234 | ||
235 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) | |
236 | { | |
237 | return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); | |
238 | } | |
239 | ||
240 | static inline bool mapping_cap_account_dirty(struct address_space *mapping) | |
241 | { | |
242 | return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); | |
243 | } | |
244 | ||
245 | static inline int bdi_sched_wait(void *word) | |
246 | { | |
247 | schedule(); | |
248 | return 0; | |
249 | } | |
250 | ||
251 | #ifdef CONFIG_CGROUP_WRITEBACK | |
252 | ||
253 | struct bdi_writeback_congested * | |
254 | wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); | |
255 | void wb_congested_put(struct bdi_writeback_congested *congested); | |
256 | struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, | |
257 | struct cgroup_subsys_state *memcg_css, | |
258 | gfp_t gfp); | |
259 | void wb_memcg_offline(struct mem_cgroup *memcg); | |
260 | void wb_blkcg_offline(struct blkcg *blkcg); | |
261 | int inode_congested(struct inode *inode, int cong_bits); | |
262 | ||
263 | /** | |
264 | * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode | |
265 | * @inode: inode of interest | |
266 | * | |
267 | * cgroup writeback requires support from both the bdi and filesystem. | |
268 | * Also, both memcg and iocg have to be on the default hierarchy. Test | |
269 | * whether all conditions are met. | |
270 | * | |
271 | * Note that the test result may change dynamically on the same inode | |
272 | * depending on how memcg and iocg are configured. | |
273 | */ | |
274 | static inline bool inode_cgwb_enabled(struct inode *inode) | |
275 | { | |
276 | struct backing_dev_info *bdi = inode_to_bdi(inode); | |
277 | ||
278 | return cgroup_subsys_on_dfl(memory_cgrp_subsys) && | |
279 | cgroup_subsys_on_dfl(io_cgrp_subsys) && | |
280 | bdi_cap_account_dirty(bdi) && | |
281 | (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && | |
282 | (inode->i_sb->s_iflags & SB_I_CGROUPWB); | |
283 | } | |
284 | ||
285 | /** | |
286 | * wb_find_current - find wb for %current on a bdi | |
287 | * @bdi: bdi of interest | |
288 | * | |
289 | * Find the wb of @bdi which matches both the memcg and blkcg of %current. | |
290 | * Must be called under rcu_read_lock() which protects the returend wb. | |
291 | * NULL if not found. | |
292 | */ | |
293 | static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) | |
294 | { | |
295 | struct cgroup_subsys_state *memcg_css; | |
296 | struct bdi_writeback *wb; | |
297 | ||
298 | memcg_css = task_css(current, memory_cgrp_id); | |
299 | if (!memcg_css->parent) | |
300 | return &bdi->wb; | |
301 | ||
302 | wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); | |
303 | ||
304 | /* | |
305 | * %current's blkcg equals the effective blkcg of its memcg. No | |
306 | * need to use the relatively expensive cgroup_get_e_css(). | |
307 | */ | |
308 | if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id))) | |
309 | return wb; | |
310 | return NULL; | |
311 | } | |
312 | ||
313 | /** | |
314 | * wb_get_create_current - get or create wb for %current on a bdi | |
315 | * @bdi: bdi of interest | |
316 | * @gfp: allocation mask | |
317 | * | |
318 | * Equivalent to wb_get_create() on %current's memcg. This function is | |
319 | * called from a relatively hot path and optimizes the common cases using | |
320 | * wb_find_current(). | |
321 | */ | |
322 | static inline struct bdi_writeback * | |
323 | wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) | |
324 | { | |
325 | struct bdi_writeback *wb; | |
326 | ||
327 | rcu_read_lock(); | |
328 | wb = wb_find_current(bdi); | |
329 | if (wb && unlikely(!wb_tryget(wb))) | |
330 | wb = NULL; | |
331 | rcu_read_unlock(); | |
332 | ||
333 | if (unlikely(!wb)) { | |
334 | struct cgroup_subsys_state *memcg_css; | |
335 | ||
336 | memcg_css = task_get_css(current, memory_cgrp_id); | |
337 | wb = wb_get_create(bdi, memcg_css, gfp); | |
338 | css_put(memcg_css); | |
339 | } | |
340 | return wb; | |
341 | } | |
342 | ||
343 | /** | |
344 | * inode_to_wb_is_valid - test whether an inode has a wb associated | |
345 | * @inode: inode of interest | |
346 | * | |
347 | * Returns %true if @inode has a wb associated. May be called without any | |
348 | * locking. | |
349 | */ | |
350 | static inline bool inode_to_wb_is_valid(struct inode *inode) | |
351 | { | |
352 | return inode->i_wb; | |
353 | } | |
354 | ||
355 | /** | |
356 | * inode_to_wb - determine the wb of an inode | |
357 | * @inode: inode of interest | |
358 | * | |
359 | * Returns the wb @inode is currently associated with. The caller must be | |
360 | * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the | |
361 | * associated wb's list_lock. | |
362 | */ | |
363 | static inline struct bdi_writeback *inode_to_wb(struct inode *inode) | |
364 | { | |
365 | #ifdef CONFIG_LOCKDEP | |
366 | WARN_ON_ONCE(debug_locks && | |
367 | (!lockdep_is_held(&inode->i_lock) && | |
368 | !lockdep_is_held(&inode->i_mapping->tree_lock) && | |
369 | !lockdep_is_held(&inode->i_wb->list_lock))); | |
370 | #endif | |
371 | return inode->i_wb; | |
372 | } | |
373 | ||
374 | /** | |
375 | * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction | |
376 | * @inode: target inode | |
377 | * @lockedp: temp bool output param, to be passed to the end function | |
378 | * | |
379 | * The caller wants to access the wb associated with @inode but isn't | |
380 | * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This | |
381 | * function determines the wb associated with @inode and ensures that the | |
382 | * association doesn't change until the transaction is finished with | |
383 | * unlocked_inode_to_wb_end(). | |
384 | * | |
385 | * The caller must call unlocked_inode_to_wb_end() with *@lockdep | |
386 | * afterwards and can't sleep during transaction. IRQ may or may not be | |
387 | * disabled on return. | |
388 | */ | |
389 | static inline struct bdi_writeback * | |
390 | unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) | |
391 | { | |
392 | rcu_read_lock(); | |
393 | ||
394 | /* | |
395 | * Paired with store_release in inode_switch_wb_work_fn() and | |
396 | * ensures that we see the new wb if we see cleared I_WB_SWITCH. | |
397 | */ | |
398 | *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; | |
399 | ||
400 | if (unlikely(*lockedp)) | |
401 | spin_lock_irq(&inode->i_mapping->tree_lock); | |
402 | ||
403 | /* | |
404 | * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. | |
405 | * inode_to_wb() will bark. Deref directly. | |
406 | */ | |
407 | return inode->i_wb; | |
408 | } | |
409 | ||
410 | /** | |
411 | * unlocked_inode_to_wb_end - end inode wb access transaction | |
412 | * @inode: target inode | |
413 | * @locked: *@lockedp from unlocked_inode_to_wb_begin() | |
414 | */ | |
415 | static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) | |
416 | { | |
417 | if (unlikely(locked)) | |
418 | spin_unlock_irq(&inode->i_mapping->tree_lock); | |
419 | ||
420 | rcu_read_unlock(); | |
421 | } | |
422 | ||
423 | #else /* CONFIG_CGROUP_WRITEBACK */ | |
424 | ||
425 | static inline bool inode_cgwb_enabled(struct inode *inode) | |
426 | { | |
427 | return false; | |
428 | } | |
429 | ||
430 | static inline struct bdi_writeback_congested * | |
431 | wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) | |
432 | { | |
433 | atomic_inc(&bdi->wb_congested->refcnt); | |
434 | return bdi->wb_congested; | |
435 | } | |
436 | ||
437 | static inline void wb_congested_put(struct bdi_writeback_congested *congested) | |
438 | { | |
439 | if (atomic_dec_and_test(&congested->refcnt)) | |
440 | kfree(congested); | |
441 | } | |
442 | ||
443 | static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) | |
444 | { | |
445 | return &bdi->wb; | |
446 | } | |
447 | ||
448 | static inline struct bdi_writeback * | |
449 | wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) | |
450 | { | |
451 | return &bdi->wb; | |
452 | } | |
453 | ||
454 | static inline bool inode_to_wb_is_valid(struct inode *inode) | |
455 | { | |
456 | return true; | |
457 | } | |
458 | ||
459 | static inline struct bdi_writeback *inode_to_wb(struct inode *inode) | |
460 | { | |
461 | return &inode_to_bdi(inode)->wb; | |
462 | } | |
463 | ||
464 | static inline struct bdi_writeback * | |
465 | unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) | |
466 | { | |
467 | return inode_to_wb(inode); | |
468 | } | |
469 | ||
470 | static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) | |
471 | { | |
472 | } | |
473 | ||
474 | static inline void wb_memcg_offline(struct mem_cgroup *memcg) | |
475 | { | |
476 | } | |
477 | ||
478 | static inline void wb_blkcg_offline(struct blkcg *blkcg) | |
479 | { | |
480 | } | |
481 | ||
482 | static inline int inode_congested(struct inode *inode, int cong_bits) | |
483 | { | |
484 | return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); | |
485 | } | |
486 | ||
487 | #endif /* CONFIG_CGROUP_WRITEBACK */ | |
488 | ||
489 | static inline int inode_read_congested(struct inode *inode) | |
490 | { | |
491 | return inode_congested(inode, 1 << WB_sync_congested); | |
492 | } | |
493 | ||
494 | static inline int inode_write_congested(struct inode *inode) | |
495 | { | |
496 | return inode_congested(inode, 1 << WB_async_congested); | |
497 | } | |
498 | ||
499 | static inline int inode_rw_congested(struct inode *inode) | |
500 | { | |
501 | return inode_congested(inode, (1 << WB_sync_congested) | | |
502 | (1 << WB_async_congested)); | |
503 | } | |
504 | ||
505 | static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) | |
506 | { | |
507 | return wb_congested(&bdi->wb, cong_bits); | |
508 | } | |
509 | ||
510 | static inline int bdi_read_congested(struct backing_dev_info *bdi) | |
511 | { | |
512 | return bdi_congested(bdi, 1 << WB_sync_congested); | |
513 | } | |
514 | ||
515 | static inline int bdi_write_congested(struct backing_dev_info *bdi) | |
516 | { | |
517 | return bdi_congested(bdi, 1 << WB_async_congested); | |
518 | } | |
519 | ||
520 | static inline int bdi_rw_congested(struct backing_dev_info *bdi) | |
521 | { | |
522 | return bdi_congested(bdi, (1 << WB_sync_congested) | | |
523 | (1 << WB_async_congested)); | |
524 | } | |
525 | ||
526 | #endif /* _LINUX_BACKING_DEV_H */ |