1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
6 #include <linux/dcache.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/srcu.h>
14 #include <linux/fsnotify_backend.h>
18 * Clear all of the marks on an inode when it is being evicted from core
20 void __fsnotify_inode_delete(struct inode
*inode
)
22 fsnotify_clear_marks_by_inode(inode
);
24 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete
);
26 void __fsnotify_vfsmount_delete(struct vfsmount
*mnt
)
28 fsnotify_clear_marks_by_mount(mnt
);
32 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
33 * @sb: superblock being unmounted.
35 * Called during unmount with no locks held, so needs to be safe against
36 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
38 static void fsnotify_unmount_inodes(struct super_block
*sb
)
40 struct inode
*inode
, *iput_inode
= NULL
;
42 spin_lock(&sb
->s_inode_list_lock
);
43 list_for_each_entry(inode
, &sb
->s_inodes
, i_sb_list
) {
45 * We cannot __iget() an inode in state I_FREEING,
46 * I_WILL_FREE, or I_NEW which is fine because by that point
47 * the inode cannot have any associated watches.
49 spin_lock(&inode
->i_lock
);
50 if (inode
->i_state
& (I_FREEING
|I_WILL_FREE
|I_NEW
)) {
51 spin_unlock(&inode
->i_lock
);
56 * If i_count is zero, the inode cannot have any watches and
57 * doing an __iget/iput with SB_ACTIVE clear would actually
58 * evict all inodes with zero i_count from icache which is
59 * unnecessarily violent and may in fact be illegal to do.
61 if (!atomic_read(&inode
->i_count
)) {
62 spin_unlock(&inode
->i_lock
);
67 spin_unlock(&inode
->i_lock
);
68 spin_unlock(&sb
->s_inode_list_lock
);
73 /* for each watch, send FS_UNMOUNT and then remove it */
74 fsnotify(inode
, FS_UNMOUNT
, inode
, FSNOTIFY_EVENT_INODE
, NULL
, 0);
76 fsnotify_inode_delete(inode
);
80 spin_lock(&sb
->s_inode_list_lock
);
82 spin_unlock(&sb
->s_inode_list_lock
);
86 /* Wait for outstanding inode references from connectors */
87 wait_var_event(&sb
->s_fsnotify_inode_refs
,
88 !atomic_long_read(&sb
->s_fsnotify_inode_refs
));
91 void fsnotify_sb_delete(struct super_block
*sb
)
93 fsnotify_unmount_inodes(sb
);
94 fsnotify_clear_marks_by_sb(sb
);
98 * fsnotify_nameremove - a filename was removed from a directory
100 * This is mostly called under parent vfs inode lock so name and
101 * dentry->d_parent should be stable. However there are some corner cases where
102 * inode lock is not held. So to be on the safe side and be reselient to future
103 * callers and out of tree users of d_delete(), we do not assume that d_parent
104 * and d_name are stable and we use dget_parent() and
105 * take_dentry_name_snapshot() to grab stable references.
107 void fsnotify_nameremove(struct dentry
*dentry
, int isdir
)
109 struct dentry
*parent
;
110 struct name_snapshot name
;
111 __u32 mask
= FS_DELETE
;
113 /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
120 parent
= dget_parent(dentry
);
121 /* Avoid unneeded take_dentry_name_snapshot() */
122 if (!(d_inode(parent
)->i_fsnotify_mask
& FS_DELETE
) &&
123 !(dentry
->d_sb
->s_fsnotify_mask
& FS_DELETE
))
126 take_dentry_name_snapshot(&name
, dentry
);
128 fsnotify(d_inode(parent
), mask
, d_inode(dentry
), FSNOTIFY_EVENT_INODE
,
131 release_dentry_name_snapshot(&name
);
136 EXPORT_SYMBOL(fsnotify_nameremove
);
139 * Given an inode, first check if we care what happens to our children. Inotify
140 * and dnotify both tell their parents about events. If we care about any event
141 * on a child we run all of our children and set a dentry flag saying that the
142 * parent cares. Thus when an event happens on a child it can quickly tell if
143 * if there is a need to find a parent and send the event to the parent.
145 void __fsnotify_update_child_dentry_flags(struct inode
*inode
)
147 struct dentry
*alias
;
150 if (!S_ISDIR(inode
->i_mode
))
153 /* determine if the children should tell inode about their events */
154 watched
= fsnotify_inode_watches_children(inode
);
156 spin_lock(&inode
->i_lock
);
157 /* run all of the dentries associated with this inode. Since this is a
158 * directory, there damn well better only be one item on this list */
159 hlist_for_each_entry(alias
, &inode
->i_dentry
, d_u
.d_alias
) {
160 struct dentry
*child
;
162 /* run all of the children of the original inode and fix their
163 * d_flags to indicate parental interest (their parent is the
165 spin_lock(&alias
->d_lock
);
166 list_for_each_entry(child
, &alias
->d_subdirs
, d_child
) {
170 spin_lock_nested(&child
->d_lock
, DENTRY_D_LOCK_NESTED
);
172 child
->d_flags
|= DCACHE_FSNOTIFY_PARENT_WATCHED
;
174 child
->d_flags
&= ~DCACHE_FSNOTIFY_PARENT_WATCHED
;
175 spin_unlock(&child
->d_lock
);
177 spin_unlock(&alias
->d_lock
);
179 spin_unlock(&inode
->i_lock
);
182 /* Notify this dentry's parent about a child's events. */
183 int __fsnotify_parent(const struct path
*path
, struct dentry
*dentry
, __u32 mask
)
185 struct dentry
*parent
;
186 struct inode
*p_inode
;
190 dentry
= path
->dentry
;
192 if (!(dentry
->d_flags
& DCACHE_FSNOTIFY_PARENT_WATCHED
))
195 parent
= dget_parent(dentry
);
196 p_inode
= parent
->d_inode
;
198 if (unlikely(!fsnotify_inode_watches_children(p_inode
))) {
199 __fsnotify_update_child_dentry_flags(p_inode
);
200 } else if (p_inode
->i_fsnotify_mask
& mask
& ALL_FSNOTIFY_EVENTS
) {
201 struct name_snapshot name
;
203 /* we are notifying a parent so come up with the new mask which
204 * specifies these are events which came from a child. */
205 mask
|= FS_EVENT_ON_CHILD
;
207 take_dentry_name_snapshot(&name
, dentry
);
209 ret
= fsnotify(p_inode
, mask
, path
, FSNOTIFY_EVENT_PATH
,
212 ret
= fsnotify(p_inode
, mask
, dentry
->d_inode
, FSNOTIFY_EVENT_INODE
,
214 release_dentry_name_snapshot(&name
);
221 EXPORT_SYMBOL_GPL(__fsnotify_parent
);
223 static int send_to_group(struct inode
*to_tell
,
224 __u32 mask
, const void *data
,
225 int data_is
, u32 cookie
,
226 const struct qstr
*file_name
,
227 struct fsnotify_iter_info
*iter_info
)
229 struct fsnotify_group
*group
= NULL
;
230 __u32 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
231 __u32 marks_mask
= 0;
232 __u32 marks_ignored_mask
= 0;
233 struct fsnotify_mark
*mark
;
236 if (WARN_ON(!iter_info
->report_mask
))
239 /* clear ignored on inode modification */
240 if (mask
& FS_MODIFY
) {
241 fsnotify_foreach_obj_type(type
) {
242 if (!fsnotify_iter_should_report_type(iter_info
, type
))
244 mark
= iter_info
->marks
[type
];
246 !(mark
->flags
& FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY
))
247 mark
->ignored_mask
= 0;
251 fsnotify_foreach_obj_type(type
) {
252 if (!fsnotify_iter_should_report_type(iter_info
, type
))
254 mark
= iter_info
->marks
[type
];
255 /* does the object mark tell us to do something? */
258 marks_mask
|= mark
->mask
;
259 marks_ignored_mask
|= mark
->ignored_mask
;
263 pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x"
264 " data=%p data_is=%d cookie=%d\n",
265 __func__
, group
, to_tell
, mask
, marks_mask
, marks_ignored_mask
,
266 data
, data_is
, cookie
);
268 if (!(test_mask
& marks_mask
& ~marks_ignored_mask
))
271 return group
->ops
->handle_event(group
, to_tell
, mask
, data
, data_is
,
272 file_name
, cookie
, iter_info
);
275 static struct fsnotify_mark
*fsnotify_first_mark(struct fsnotify_mark_connector
**connp
)
277 struct fsnotify_mark_connector
*conn
;
278 struct hlist_node
*node
= NULL
;
280 conn
= srcu_dereference(*connp
, &fsnotify_mark_srcu
);
282 node
= srcu_dereference(conn
->list
.first
, &fsnotify_mark_srcu
);
284 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
287 static struct fsnotify_mark
*fsnotify_next_mark(struct fsnotify_mark
*mark
)
289 struct hlist_node
*node
= NULL
;
292 node
= srcu_dereference(mark
->obj_list
.next
,
293 &fsnotify_mark_srcu
);
295 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
299 * iter_info is a multi head priority queue of marks.
300 * Pick a subset of marks from queue heads, all with the
301 * same group and set the report_mask for selected subset.
302 * Returns the report_mask of the selected subset.
304 static unsigned int fsnotify_iter_select_report_types(
305 struct fsnotify_iter_info
*iter_info
)
307 struct fsnotify_group
*max_prio_group
= NULL
;
308 struct fsnotify_mark
*mark
;
311 /* Choose max prio group among groups of all queue heads */
312 fsnotify_foreach_obj_type(type
) {
313 mark
= iter_info
->marks
[type
];
315 fsnotify_compare_groups(max_prio_group
, mark
->group
) > 0)
316 max_prio_group
= mark
->group
;
322 /* Set the report mask for marks from same group as max prio group */
323 iter_info
->report_mask
= 0;
324 fsnotify_foreach_obj_type(type
) {
325 mark
= iter_info
->marks
[type
];
327 fsnotify_compare_groups(max_prio_group
, mark
->group
) == 0)
328 fsnotify_iter_set_report_type(iter_info
, type
);
331 return iter_info
->report_mask
;
335 * Pop from iter_info multi head queue, the marks that were iterated in the
336 * current iteration step.
338 static void fsnotify_iter_next(struct fsnotify_iter_info
*iter_info
)
342 fsnotify_foreach_obj_type(type
) {
343 if (fsnotify_iter_should_report_type(iter_info
, type
))
344 iter_info
->marks
[type
] =
345 fsnotify_next_mark(iter_info
->marks
[type
]);
350 * This is the main call to fsnotify. The VFS calls into hook specific functions
351 * in linux/fsnotify.h. Those functions then in turn call here. Here will call
352 * out to all of the registered fsnotify_group. Those groups can then use the
353 * notification event in whatever means they feel necessary.
355 int fsnotify(struct inode
*to_tell
, __u32 mask
, const void *data
, int data_is
,
356 const struct qstr
*file_name
, u32 cookie
)
358 struct fsnotify_iter_info iter_info
= {};
359 struct super_block
*sb
= to_tell
->i_sb
;
360 struct mount
*mnt
= NULL
;
361 __u32 mnt_or_sb_mask
= sb
->s_fsnotify_mask
;
363 __u32 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
365 if (data_is
== FSNOTIFY_EVENT_PATH
) {
366 mnt
= real_mount(((const struct path
*)data
)->mnt
);
367 mnt_or_sb_mask
|= mnt
->mnt_fsnotify_mask
;
369 /* An event "on child" is not intended for a mount/sb mark */
370 if (mask
& FS_EVENT_ON_CHILD
)
374 * Optimization: srcu_read_lock() has a memory barrier which can
375 * be expensive. It protects walking the *_fsnotify_marks lists.
376 * However, if we do not walk the lists, we do not have to do
377 * SRCU because we have no references to any objects and do not
378 * need SRCU to keep them "alive".
380 if (!to_tell
->i_fsnotify_marks
&& !sb
->s_fsnotify_marks
&&
381 (!mnt
|| !mnt
->mnt_fsnotify_marks
))
384 * if this is a modify event we may need to clear the ignored masks
385 * otherwise return if neither the inode nor the vfsmount/sb care about
386 * this type of event.
388 if (!(mask
& FS_MODIFY
) &&
389 !(test_mask
& (to_tell
->i_fsnotify_mask
| mnt_or_sb_mask
)))
392 iter_info
.srcu_idx
= srcu_read_lock(&fsnotify_mark_srcu
);
394 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_INODE
] =
395 fsnotify_first_mark(&to_tell
->i_fsnotify_marks
);
396 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_SB
] =
397 fsnotify_first_mark(&sb
->s_fsnotify_marks
);
399 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_VFSMOUNT
] =
400 fsnotify_first_mark(&mnt
->mnt_fsnotify_marks
);
404 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
405 * ignore masks are properly reflected for mount/sb mark notifications.
406 * That's why this traversal is so complicated...
408 while (fsnotify_iter_select_report_types(&iter_info
)) {
409 ret
= send_to_group(to_tell
, mask
, data
, data_is
, cookie
,
410 file_name
, &iter_info
);
412 if (ret
&& (mask
& ALL_FSNOTIFY_PERM_EVENTS
))
415 fsnotify_iter_next(&iter_info
);
419 srcu_read_unlock(&fsnotify_mark_srcu
, iter_info
.srcu_idx
);
423 EXPORT_SYMBOL_GPL(fsnotify
);
425 extern struct kmem_cache
*fsnotify_mark_connector_cachep
;
427 static __init
int fsnotify_init(void)
431 BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS
) != 25);
433 ret
= init_srcu_struct(&fsnotify_mark_srcu
);
435 panic("initializing fsnotify_mark_srcu");
437 fsnotify_mark_connector_cachep
= KMEM_CACHE(fsnotify_mark_connector
,
442 core_initcall(fsnotify_init
);