1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
6 #include <linux/dcache.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/srcu.h>
14 #include <linux/fsnotify_backend.h>
18 * Clear all of the marks on an inode when it is being evicted from core
20 void __fsnotify_inode_delete(struct inode
*inode
)
22 fsnotify_clear_marks_by_inode(inode
);
24 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete
);
26 void __fsnotify_vfsmount_delete(struct vfsmount
*mnt
)
28 fsnotify_clear_marks_by_mount(mnt
);
32 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
33 * @sb: superblock being unmounted.
35 * Called during unmount with no locks held, so needs to be safe against
36 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
38 static void fsnotify_unmount_inodes(struct super_block
*sb
)
40 struct inode
*inode
, *iput_inode
= NULL
;
42 spin_lock(&sb
->s_inode_list_lock
);
43 list_for_each_entry(inode
, &sb
->s_inodes
, i_sb_list
) {
45 * We cannot __iget() an inode in state I_FREEING,
46 * I_WILL_FREE, or I_NEW which is fine because by that point
47 * the inode cannot have any associated watches.
49 spin_lock(&inode
->i_lock
);
50 if (inode
->i_state
& (I_FREEING
|I_WILL_FREE
|I_NEW
)) {
51 spin_unlock(&inode
->i_lock
);
56 * If i_count is zero, the inode cannot have any watches and
57 * doing an __iget/iput with SB_ACTIVE clear would actually
58 * evict all inodes with zero i_count from icache which is
59 * unnecessarily violent and may in fact be illegal to do.
60 * However, we should have been called /after/ evict_inodes
61 * removed all zero refcount inodes, in any case. Test to
64 if (!atomic_read(&inode
->i_count
)) {
65 spin_unlock(&inode
->i_lock
);
70 spin_unlock(&inode
->i_lock
);
71 spin_unlock(&sb
->s_inode_list_lock
);
76 /* for each watch, send FS_UNMOUNT and then remove it */
77 fsnotify_inode(inode
, FS_UNMOUNT
);
79 fsnotify_inode_delete(inode
);
84 spin_lock(&sb
->s_inode_list_lock
);
86 spin_unlock(&sb
->s_inode_list_lock
);
92 void fsnotify_sb_delete(struct super_block
*sb
)
94 fsnotify_unmount_inodes(sb
);
95 fsnotify_clear_marks_by_sb(sb
);
96 /* Wait for outstanding object references from connectors */
97 wait_var_event(&sb
->s_fsnotify_connectors
,
98 !atomic_long_read(&sb
->s_fsnotify_connectors
));
102 * Given an inode, first check if we care what happens to our children. Inotify
103 * and dnotify both tell their parents about events. If we care about any event
104 * on a child we run all of our children and set a dentry flag saying that the
105 * parent cares. Thus when an event happens on a child it can quickly tell if
106 * if there is a need to find a parent and send the event to the parent.
108 void __fsnotify_update_child_dentry_flags(struct inode
*inode
)
110 struct dentry
*alias
;
113 if (!S_ISDIR(inode
->i_mode
))
116 /* determine if the children should tell inode about their events */
117 watched
= fsnotify_inode_watches_children(inode
);
119 spin_lock(&inode
->i_lock
);
120 /* run all of the dentries associated with this inode. Since this is a
121 * directory, there damn well better only be one item on this list */
122 hlist_for_each_entry(alias
, &inode
->i_dentry
, d_u
.d_alias
) {
123 struct dentry
*child
;
125 /* run all of the children of the original inode and fix their
126 * d_flags to indicate parental interest (their parent is the
128 spin_lock(&alias
->d_lock
);
129 list_for_each_entry(child
, &alias
->d_subdirs
, d_child
) {
133 spin_lock_nested(&child
->d_lock
, DENTRY_D_LOCK_NESTED
);
135 child
->d_flags
|= DCACHE_FSNOTIFY_PARENT_WATCHED
;
137 child
->d_flags
&= ~DCACHE_FSNOTIFY_PARENT_WATCHED
;
138 spin_unlock(&child
->d_lock
);
140 spin_unlock(&alias
->d_lock
);
142 spin_unlock(&inode
->i_lock
);
145 /* Are inode/sb/mount interested in parent and name info with this event? */
146 static bool fsnotify_event_needs_parent(struct inode
*inode
, struct mount
*mnt
,
149 __u32 marks_mask
= 0;
151 /* We only send parent/name to inode/sb/mount for events on non-dir */
156 * All events that are possible on child can also may be reported with
157 * parent/name info to inode/sb/mount. Otherwise, a watching parent
158 * could result in events reported with unexpected name info to sb/mount.
160 BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD
& ~FS_EVENTS_POSS_TO_PARENT
);
162 /* Did either inode/sb/mount subscribe for events with parent/name? */
163 marks_mask
|= fsnotify_parent_needed_mask(inode
->i_fsnotify_mask
);
164 marks_mask
|= fsnotify_parent_needed_mask(inode
->i_sb
->s_fsnotify_mask
);
166 marks_mask
|= fsnotify_parent_needed_mask(mnt
->mnt_fsnotify_mask
);
168 /* Did they subscribe for this event with parent/name info? */
169 return mask
& marks_mask
;
173 * Notify this dentry's parent about a child's events with child name info
174 * if parent is watching or if inode/sb/mount are interested in events with
175 * parent and name info.
177 * Notify only the child without name info if parent is not watching and
178 * inode/sb/mount are not interested in events with parent and name info.
180 int __fsnotify_parent(struct dentry
*dentry
, __u32 mask
, const void *data
,
183 const struct path
*path
= fsnotify_data_path(data
, data_type
);
184 struct mount
*mnt
= path
? real_mount(path
->mnt
) : NULL
;
185 struct inode
*inode
= d_inode(dentry
);
186 struct dentry
*parent
;
187 bool parent_watched
= dentry
->d_flags
& DCACHE_FSNOTIFY_PARENT_WATCHED
;
188 bool parent_needed
, parent_interested
;
190 struct inode
*p_inode
= NULL
;
191 struct name_snapshot name
;
192 struct qstr
*file_name
= NULL
;
196 * Do inode/sb/mount care about parent and name info on non-dir?
197 * Do they care about any event at all?
199 if (!inode
->i_fsnotify_marks
&& !inode
->i_sb
->s_fsnotify_marks
&&
200 (!mnt
|| !mnt
->mnt_fsnotify_marks
) && !parent_watched
)
204 parent_needed
= fsnotify_event_needs_parent(inode
, mnt
, mask
);
205 if (!parent_watched
&& !parent_needed
)
208 /* Does parent inode care about events on children? */
209 parent
= dget_parent(dentry
);
210 p_inode
= parent
->d_inode
;
211 p_mask
= fsnotify_inode_watches_children(p_inode
);
212 if (unlikely(parent_watched
&& !p_mask
))
213 __fsnotify_update_child_dentry_flags(p_inode
);
216 * Include parent/name in notification either if some notification
217 * groups require parent info or the parent is interested in this event.
219 parent_interested
= mask
& p_mask
& ALL_FSNOTIFY_EVENTS
;
220 if (parent_needed
|| parent_interested
) {
221 /* When notifying parent, child should be passed as data */
222 WARN_ON_ONCE(inode
!= fsnotify_data_inode(data
, data_type
));
224 /* Notify both parent and child with child name info */
225 take_dentry_name_snapshot(&name
, dentry
);
226 file_name
= &name
.name
;
227 if (parent_interested
)
228 mask
|= FS_EVENT_ON_CHILD
;
232 ret
= fsnotify(mask
, data
, data_type
, p_inode
, file_name
, inode
, 0);
235 release_dentry_name_snapshot(&name
);
240 EXPORT_SYMBOL_GPL(__fsnotify_parent
);
242 static int fsnotify_handle_inode_event(struct fsnotify_group
*group
,
243 struct fsnotify_mark
*inode_mark
,
244 u32 mask
, const void *data
, int data_type
,
245 struct inode
*dir
, const struct qstr
*name
,
248 const struct path
*path
= fsnotify_data_path(data
, data_type
);
249 struct inode
*inode
= fsnotify_data_inode(data
, data_type
);
250 const struct fsnotify_ops
*ops
= group
->ops
;
252 if (WARN_ON_ONCE(!ops
->handle_inode_event
))
255 if ((inode_mark
->mask
& FS_EXCL_UNLINK
) &&
256 path
&& d_unlinked(path
->dentry
))
259 /* Check interest of this mark in case event was sent with two marks */
260 if (!(mask
& inode_mark
->mask
& ALL_FSNOTIFY_EVENTS
))
263 return ops
->handle_inode_event(inode_mark
, mask
, inode
, dir
, name
, cookie
);
266 static int fsnotify_handle_event(struct fsnotify_group
*group
, __u32 mask
,
267 const void *data
, int data_type
,
268 struct inode
*dir
, const struct qstr
*name
,
269 u32 cookie
, struct fsnotify_iter_info
*iter_info
)
271 struct fsnotify_mark
*inode_mark
= fsnotify_iter_inode_mark(iter_info
);
272 struct fsnotify_mark
*parent_mark
= fsnotify_iter_parent_mark(iter_info
);
275 if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info
)) ||
276 WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info
)))
281 * parent_mark indicates that the parent inode is watching
282 * children and interested in this event, which is an event
283 * possible on child. But is *this mark* watching children and
284 * interested in this event?
286 if (parent_mark
->mask
& FS_EVENT_ON_CHILD
) {
287 ret
= fsnotify_handle_inode_event(group
, parent_mark
, mask
,
288 data
, data_type
, dir
, name
, 0);
296 if (mask
& FS_EVENT_ON_CHILD
) {
298 * Some events can be sent on both parent dir and child marks
299 * (e.g. FS_ATTRIB). If both parent dir and child are
300 * watching, report the event once to parent dir with name (if
301 * interested) and once to child without name (if interested).
302 * The child watcher is expecting an event without a file name
303 * and without the FS_EVENT_ON_CHILD flag.
305 mask
&= ~FS_EVENT_ON_CHILD
;
310 return fsnotify_handle_inode_event(group
, inode_mark
, mask
, data
, data_type
,
314 static int send_to_group(__u32 mask
, const void *data
, int data_type
,
315 struct inode
*dir
, const struct qstr
*file_name
,
316 u32 cookie
, struct fsnotify_iter_info
*iter_info
)
318 struct fsnotify_group
*group
= NULL
;
319 __u32 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
320 __u32 marks_mask
= 0;
321 __u32 marks_ignored_mask
= 0;
322 struct fsnotify_mark
*mark
;
325 if (WARN_ON(!iter_info
->report_mask
))
328 /* clear ignored on inode modification */
329 if (mask
& FS_MODIFY
) {
330 fsnotify_foreach_obj_type(type
) {
331 if (!fsnotify_iter_should_report_type(iter_info
, type
))
333 mark
= iter_info
->marks
[type
];
335 !(mark
->flags
& FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY
))
336 mark
->ignored_mask
= 0;
340 fsnotify_foreach_obj_type(type
) {
341 if (!fsnotify_iter_should_report_type(iter_info
, type
))
343 mark
= iter_info
->marks
[type
];
344 /* does the object mark tell us to do something? */
347 marks_mask
|= mark
->mask
;
348 marks_ignored_mask
|= mark
->ignored_mask
;
352 pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
353 __func__
, group
, mask
, marks_mask
, marks_ignored_mask
,
354 data
, data_type
, dir
, cookie
);
356 if (!(test_mask
& marks_mask
& ~marks_ignored_mask
))
359 if (group
->ops
->handle_event
) {
360 return group
->ops
->handle_event(group
, mask
, data
, data_type
, dir
,
361 file_name
, cookie
, iter_info
);
364 return fsnotify_handle_event(group
, mask
, data
, data_type
, dir
,
365 file_name
, cookie
, iter_info
);
368 static struct fsnotify_mark
*fsnotify_first_mark(struct fsnotify_mark_connector
**connp
)
370 struct fsnotify_mark_connector
*conn
;
371 struct hlist_node
*node
= NULL
;
373 conn
= srcu_dereference(*connp
, &fsnotify_mark_srcu
);
375 node
= srcu_dereference(conn
->list
.first
, &fsnotify_mark_srcu
);
377 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
380 static struct fsnotify_mark
*fsnotify_next_mark(struct fsnotify_mark
*mark
)
382 struct hlist_node
*node
= NULL
;
385 node
= srcu_dereference(mark
->obj_list
.next
,
386 &fsnotify_mark_srcu
);
388 return hlist_entry_safe(node
, struct fsnotify_mark
, obj_list
);
392 * iter_info is a multi head priority queue of marks.
393 * Pick a subset of marks from queue heads, all with the
394 * same group and set the report_mask for selected subset.
395 * Returns the report_mask of the selected subset.
397 static unsigned int fsnotify_iter_select_report_types(
398 struct fsnotify_iter_info
*iter_info
)
400 struct fsnotify_group
*max_prio_group
= NULL
;
401 struct fsnotify_mark
*mark
;
404 /* Choose max prio group among groups of all queue heads */
405 fsnotify_foreach_obj_type(type
) {
406 mark
= iter_info
->marks
[type
];
408 fsnotify_compare_groups(max_prio_group
, mark
->group
) > 0)
409 max_prio_group
= mark
->group
;
415 /* Set the report mask for marks from same group as max prio group */
416 iter_info
->report_mask
= 0;
417 fsnotify_foreach_obj_type(type
) {
418 mark
= iter_info
->marks
[type
];
420 fsnotify_compare_groups(max_prio_group
, mark
->group
) == 0)
421 fsnotify_iter_set_report_type(iter_info
, type
);
424 return iter_info
->report_mask
;
428 * Pop from iter_info multi head queue, the marks that were iterated in the
429 * current iteration step.
431 static void fsnotify_iter_next(struct fsnotify_iter_info
*iter_info
)
435 fsnotify_foreach_obj_type(type
) {
436 if (fsnotify_iter_should_report_type(iter_info
, type
))
437 iter_info
->marks
[type
] =
438 fsnotify_next_mark(iter_info
->marks
[type
]);
443 * fsnotify - This is the main call to fsnotify.
445 * The VFS calls into hook specific functions in linux/fsnotify.h.
446 * Those functions then in turn call here. Here will call out to all of the
447 * registered fsnotify_group. Those groups can then use the notification event
448 * in whatever means they feel necessary.
450 * @mask: event type and flags
451 * @data: object that event happened on
452 * @data_type: type of object for fanotify_data_XXX() accessors
453 * @dir: optional directory associated with event -
454 * if @file_name is not NULL, this is the directory that
455 * @file_name is relative to
456 * @file_name: optional file name associated with event
457 * @inode: optional inode associated with event -
458 * either @dir or @inode must be non-NULL.
459 * if both are non-NULL event may be reported to both.
460 * @cookie: inotify rename cookie
462 int fsnotify(__u32 mask
, const void *data
, int data_type
, struct inode
*dir
,
463 const struct qstr
*file_name
, struct inode
*inode
, u32 cookie
)
465 const struct path
*path
= fsnotify_data_path(data
, data_type
);
466 struct fsnotify_iter_info iter_info
= {};
467 struct super_block
*sb
;
468 struct mount
*mnt
= NULL
;
469 struct inode
*parent
= NULL
;
471 __u32 test_mask
, marks_mask
;
474 mnt
= real_mount(path
->mnt
);
477 /* Dirent event - report on TYPE_INODE to dir */
479 } else if (mask
& FS_EVENT_ON_CHILD
) {
481 * Event on child - report on TYPE_PARENT to dir if it is
482 * watching children and on TYPE_INODE to child.
489 * Optimization: srcu_read_lock() has a memory barrier which can
490 * be expensive. It protects walking the *_fsnotify_marks lists.
491 * However, if we do not walk the lists, we do not have to do
492 * SRCU because we have no references to any objects and do not
493 * need SRCU to keep them "alive".
495 if (!sb
->s_fsnotify_marks
&&
496 (!mnt
|| !mnt
->mnt_fsnotify_marks
) &&
497 (!inode
|| !inode
->i_fsnotify_marks
) &&
498 (!parent
|| !parent
->i_fsnotify_marks
))
501 marks_mask
= sb
->s_fsnotify_mask
;
503 marks_mask
|= mnt
->mnt_fsnotify_mask
;
505 marks_mask
|= inode
->i_fsnotify_mask
;
507 marks_mask
|= parent
->i_fsnotify_mask
;
511 * if this is a modify event we may need to clear the ignored masks
512 * otherwise return if none of the marks care about this type of event.
514 test_mask
= (mask
& ALL_FSNOTIFY_EVENTS
);
515 if (!(mask
& FS_MODIFY
) && !(test_mask
& marks_mask
))
518 iter_info
.srcu_idx
= srcu_read_lock(&fsnotify_mark_srcu
);
520 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_SB
] =
521 fsnotify_first_mark(&sb
->s_fsnotify_marks
);
523 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_VFSMOUNT
] =
524 fsnotify_first_mark(&mnt
->mnt_fsnotify_marks
);
527 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_INODE
] =
528 fsnotify_first_mark(&inode
->i_fsnotify_marks
);
531 iter_info
.marks
[FSNOTIFY_OBJ_TYPE_PARENT
] =
532 fsnotify_first_mark(&parent
->i_fsnotify_marks
);
536 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
537 * ignore masks are properly reflected for mount/sb mark notifications.
538 * That's why this traversal is so complicated...
540 while (fsnotify_iter_select_report_types(&iter_info
)) {
541 ret
= send_to_group(mask
, data
, data_type
, dir
, file_name
,
544 if (ret
&& (mask
& ALL_FSNOTIFY_PERM_EVENTS
))
547 fsnotify_iter_next(&iter_info
);
551 srcu_read_unlock(&fsnotify_mark_srcu
, iter_info
.srcu_idx
);
555 EXPORT_SYMBOL_GPL(fsnotify
);
557 static __init
int fsnotify_init(void)
561 BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS
) != 25);
563 ret
= init_srcu_struct(&fsnotify_mark_srcu
);
565 panic("initializing fsnotify_mark_srcu");
567 fsnotify_mark_connector_cachep
= KMEM_CACHE(fsnotify_mark_connector
,
572 core_initcall(fsnotify_init
);