1 // SPDX-License-Identifier: GPL-2.0-only
3 * Landlock LSM - Filesystem management and hooks
5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
6 * Copyright © 2018-2020 ANSSI
9 #include <linux/atomic.h>
10 #include <linux/bitops.h>
11 #include <linux/bits.h>
12 #include <linux/compiler_types.h>
13 #include <linux/dcache.h>
14 #include <linux/err.h>
16 #include <linux/init.h>
17 #include <linux/kernel.h>
18 #include <linux/limits.h>
19 #include <linux/list.h>
20 #include <linux/lsm_hooks.h>
21 #include <linux/mount.h>
22 #include <linux/namei.h>
23 #include <linux/path.h>
24 #include <linux/rcupdate.h>
25 #include <linux/spinlock.h>
26 #include <linux/stat.h>
27 #include <linux/types.h>
28 #include <linux/wait_bit.h>
29 #include <linux/workqueue.h>
30 #include <uapi/linux/landlock.h>
40 /* Underlying object management */
42 static void release_inode(struct landlock_object
*const object
)
43 __releases(object
->lock
)
45 struct inode
*const inode
= object
->underobj
;
46 struct super_block
*sb
;
49 spin_unlock(&object
->lock
);
54 * Protects against concurrent use by hook_sb_delete() of the reference
55 * to the underlying inode.
57 object
->underobj
= NULL
;
59 * Makes sure that if the filesystem is concurrently unmounted,
60 * hook_sb_delete() will wait for us to finish iput().
63 atomic_long_inc(&landlock_superblock(sb
)->inode_refs
);
64 spin_unlock(&object
->lock
);
66 * Because object->underobj was not NULL, hook_sb_delete() and
67 * get_inode_object() guarantee that it is safe to reset
68 * landlock_inode(inode)->object while it is not NULL. It is therefore
69 * not necessary to lock inode->i_lock.
71 rcu_assign_pointer(landlock_inode(inode
)->object
, NULL
);
73 * Now, new rules can safely be tied to @inode with get_inode_object().
77 if (atomic_long_dec_and_test(&landlock_superblock(sb
)->inode_refs
))
78 wake_up_var(&landlock_superblock(sb
)->inode_refs
);
81 static const struct landlock_object_underops landlock_fs_underops
= {
82 .release
= release_inode
85 /* Ruleset management */
87 static struct landlock_object
*get_inode_object(struct inode
*const inode
)
89 struct landlock_object
*object
, *new_object
;
90 struct landlock_inode_security
*inode_sec
= landlock_inode(inode
);
94 object
= rcu_dereference(inode_sec
->object
);
96 if (likely(refcount_inc_not_zero(&object
->usage
))) {
101 * We are racing with release_inode(), the object is going
102 * away. Wait for release_inode(), then retry.
104 spin_lock(&object
->lock
);
105 spin_unlock(&object
->lock
);
111 * If there is no object tied to @inode, then create a new one (without
112 * holding any locks).
114 new_object
= landlock_create_object(&landlock_fs_underops
, inode
);
115 if (IS_ERR(new_object
))
119 * Protects against concurrent calls to get_inode_object() or
122 spin_lock(&inode
->i_lock
);
123 if (unlikely(rcu_access_pointer(inode_sec
->object
))) {
124 /* Someone else just created the object, bail out and retry. */
125 spin_unlock(&inode
->i_lock
);
133 * @inode will be released by hook_sb_delete() on its superblock
134 * shutdown, or by release_inode() when no more ruleset references the
138 rcu_assign_pointer(inode_sec
->object
, new_object
);
139 spin_unlock(&inode
->i_lock
);
143 /* All access rights that can be tied to files. */
144 #define ACCESS_FILE ( \
145 LANDLOCK_ACCESS_FS_EXECUTE | \
146 LANDLOCK_ACCESS_FS_WRITE_FILE | \
147 LANDLOCK_ACCESS_FS_READ_FILE)
150 * @path: Should have been checked by get_path_from_fd().
152 int landlock_append_fs_rule(struct landlock_ruleset
*const ruleset
,
153 const struct path
*const path
, u32 access_rights
)
156 struct landlock_object
*object
;
158 /* Files only get access rights that make sense. */
159 if (!d_is_dir(path
->dentry
) && (access_rights
| ACCESS_FILE
) !=
162 if (WARN_ON_ONCE(ruleset
->num_layers
!= 1))
165 /* Transforms relative access rights to absolute ones. */
166 access_rights
|= LANDLOCK_MASK_ACCESS_FS
& ~ruleset
->fs_access_masks
[0];
167 object
= get_inode_object(d_backing_inode(path
->dentry
));
169 return PTR_ERR(object
);
170 mutex_lock(&ruleset
->lock
);
171 err
= landlock_insert_rule(ruleset
, object
, access_rights
);
172 mutex_unlock(&ruleset
->lock
);
174 * No need to check for an error because landlock_insert_rule()
175 * increments the refcount for the new object if needed.
177 landlock_put_object(object
);
181 /* Access-control management */
183 static inline u64
unmask_layers(
184 const struct landlock_ruleset
*const domain
,
185 const struct path
*const path
, const u32 access_request
,
188 const struct landlock_rule
*rule
;
189 const struct inode
*inode
;
192 if (d_is_negative(path
->dentry
))
193 /* Ignore nonexistent leafs. */
195 inode
= d_backing_inode(path
->dentry
);
197 rule
= landlock_find_rule(domain
,
198 rcu_dereference(landlock_inode(inode
)->object
));
204 * An access is granted if, for each policy layer, at least one rule
205 * encountered on the pathwalk grants the requested accesses,
206 * regardless of their position in the layer stack. We must then check
207 * the remaining layers for each inode, from the first added layer to
210 for (i
= 0; i
< rule
->num_layers
; i
++) {
211 const struct landlock_layer
*const layer
= &rule
->layers
[i
];
212 const u64 layer_level
= BIT_ULL(layer
->level
- 1);
214 /* Checks that the layer grants access to the full request. */
215 if ((layer
->access
& access_request
) == access_request
) {
216 layer_mask
&= ~layer_level
;
225 static int check_access_path(const struct landlock_ruleset
*const domain
,
226 const struct path
*const path
, u32 access_request
)
228 bool allowed
= false;
229 struct path walker_path
;
233 /* Make sure all layers can be checked. */
234 BUILD_BUG_ON(BITS_PER_TYPE(layer_mask
) < LANDLOCK_MAX_NUM_LAYERS
);
238 if (WARN_ON_ONCE(!domain
|| !path
))
241 * Allows access to pseudo filesystems that will never be mountable
242 * (e.g. sockfs, pipefs), but can still be reachable through
243 * /proc/<pid>/fd/<file-descriptor> .
245 if ((path
->dentry
->d_sb
->s_flags
& SB_NOUSER
) ||
246 (d_is_positive(path
->dentry
) &&
247 unlikely(IS_PRIVATE(d_backing_inode(path
->dentry
)))))
249 if (WARN_ON_ONCE(domain
->num_layers
< 1))
252 /* Saves all layers handling a subset of requested accesses. */
254 for (i
= 0; i
< domain
->num_layers
; i
++) {
255 if (domain
->fs_access_masks
[i
] & access_request
)
256 layer_mask
|= BIT_ULL(i
);
258 /* An access request not handled by the domain is allowed. */
263 path_get(&walker_path
);
265 * We need to walk through all the hierarchy to not miss any relevant
269 struct dentry
*parent_dentry
;
271 layer_mask
= unmask_layers(domain
, &walker_path
,
272 access_request
, layer_mask
);
273 if (layer_mask
== 0) {
274 /* Stops when a rule from each layer grants access. */
280 if (walker_path
.dentry
== walker_path
.mnt
->mnt_root
) {
281 if (follow_up(&walker_path
)) {
282 /* Ignores hidden mount points. */
286 * Stops at the real root. Denies access
287 * because not all layers have granted access.
293 if (unlikely(IS_ROOT(walker_path
.dentry
))) {
295 * Stops at disconnected root directories. Only allows
296 * access to internal filesystems (e.g. nsfs, which is
297 * reachable through /proc/<pid>/ns/<namespace>).
299 allowed
= !!(walker_path
.mnt
->mnt_flags
& MNT_INTERNAL
);
302 parent_dentry
= dget_parent(walker_path
.dentry
);
303 dput(walker_path
.dentry
);
304 walker_path
.dentry
= parent_dentry
;
306 path_put(&walker_path
);
307 return allowed
? 0 : -EACCES
;
310 static inline int current_check_access_path(const struct path
*const path
,
311 const u32 access_request
)
313 const struct landlock_ruleset
*const dom
=
314 landlock_get_current_domain();
318 return check_access_path(dom
, path
, access_request
);
323 static void hook_inode_free_security(struct inode
*const inode
)
326 * All inodes must already have been untied from their object by
327 * release_inode() or hook_sb_delete().
329 WARN_ON_ONCE(landlock_inode(inode
)->object
);
332 /* Super-block hooks */
335 * Release the inodes used in a security policy.
337 * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
339 static void hook_sb_delete(struct super_block
*const sb
)
341 struct inode
*inode
, *prev_inode
= NULL
;
343 if (!landlock_initialized
)
346 spin_lock(&sb
->s_inode_list_lock
);
347 list_for_each_entry(inode
, &sb
->s_inodes
, i_sb_list
) {
348 struct landlock_object
*object
;
350 /* Only handles referenced inodes. */
351 if (!atomic_read(&inode
->i_count
))
355 * Protects against concurrent modification of inode (e.g.
356 * from get_inode_object()).
358 spin_lock(&inode
->i_lock
);
360 * Checks I_FREEING and I_WILL_FREE to protect against a race
361 * condition when release_inode() just called iput(), which
362 * could lead to a NULL dereference of inode->security or a
363 * second call to iput() for the same Landlock object. Also
364 * checks I_NEW because such inode cannot be tied to an object.
366 if (inode
->i_state
& (I_FREEING
| I_WILL_FREE
| I_NEW
)) {
367 spin_unlock(&inode
->i_lock
);
372 object
= rcu_dereference(landlock_inode(inode
)->object
);
375 spin_unlock(&inode
->i_lock
);
378 /* Keeps a reference to this inode until the next loop walk. */
380 spin_unlock(&inode
->i_lock
);
383 * If there is no concurrent release_inode() ongoing, then we
384 * are in charge of calling iput() on this inode, otherwise we
385 * will just wait for it to finish.
387 spin_lock(&object
->lock
);
388 if (object
->underobj
== inode
) {
389 object
->underobj
= NULL
;
390 spin_unlock(&object
->lock
);
394 * Because object->underobj was not NULL,
395 * release_inode() and get_inode_object() guarantee
396 * that it is safe to reset
397 * landlock_inode(inode)->object while it is not NULL.
398 * It is therefore not necessary to lock inode->i_lock.
400 rcu_assign_pointer(landlock_inode(inode
)->object
, NULL
);
402 * At this point, we own the ihold() reference that was
403 * originally set up by get_inode_object() and the
404 * __iget() reference that we just set in this loop
405 * walk. Therefore the following call to iput() will
406 * not sleep nor drop the inode because there is now at
407 * least two references to it.
411 spin_unlock(&object
->lock
);
417 * At this point, we still own the __iget() reference
418 * that we just set in this loop walk. Therefore we
419 * can drop the list lock and know that the inode won't
420 * disappear from under us until the next loop walk.
422 spin_unlock(&sb
->s_inode_list_lock
);
424 * We can now actually put the inode reference from the
425 * previous loop walk, which is not needed anymore.
429 spin_lock(&sb
->s_inode_list_lock
);
433 spin_unlock(&sb
->s_inode_list_lock
);
435 /* Puts the inode reference from the last loop walk, if any. */
438 /* Waits for pending iput() in release_inode(). */
439 wait_var_event(&landlock_superblock(sb
)->inode_refs
, !atomic_long_read(
440 &landlock_superblock(sb
)->inode_refs
));
444 * Because a Landlock security policy is defined according to the filesystem
445 * topology (i.e. the mount namespace), changing it may grant access to files
446 * not previously allowed.
448 * To make it simple, deny any filesystem topology modification by landlocked
449 * processes. Non-landlocked processes may still change the namespace of a
450 * landlocked process, but this kind of threat must be handled by a system-wide
451 * access-control security policy.
453 * This could be lifted in the future if Landlock can safely handle mount
454 * namespace updates requested by a landlocked process. Indeed, we could
455 * update the current domain (which is currently read-only) by taking into
456 * account the accesses of the source and the destination of a new mount point.
457 * However, it would also require to make all the child domains dynamically
458 * inherit these new constraints. Anyway, for backward compatibility reasons,
459 * a dedicated user space option would be required (e.g. as a ruleset flag).
461 static int hook_sb_mount(const char *const dev_name
,
462 const struct path
*const path
, const char *const type
,
463 const unsigned long flags
, void *const data
)
465 if (!landlock_get_current_domain())
470 static int hook_move_mount(const struct path
*const from_path
,
471 const struct path
*const to_path
)
473 if (!landlock_get_current_domain())
479 * Removing a mount point may reveal a previously hidden file hierarchy, which
480 * may then grant access to files, which may have previously been forbidden.
482 static int hook_sb_umount(struct vfsmount
*const mnt
, const int flags
)
484 if (!landlock_get_current_domain())
489 static int hook_sb_remount(struct super_block
*const sb
, void *const mnt_opts
)
491 if (!landlock_get_current_domain())
497 * pivot_root(2), like mount(2), changes the current mount namespace. It must
498 * then be forbidden for a landlocked process.
500 * However, chroot(2) may be allowed because it only changes the relative root
501 * directory of the current process. Moreover, it can be used to restrict the
502 * view of the filesystem.
504 static int hook_sb_pivotroot(const struct path
*const old_path
,
505 const struct path
*const new_path
)
507 if (!landlock_get_current_domain())
514 static inline u32
get_mode_access(const umode_t mode
)
516 switch (mode
& S_IFMT
) {
518 return LANDLOCK_ACCESS_FS_MAKE_SYM
;
520 /* A zero mode translates to S_IFREG. */
522 return LANDLOCK_ACCESS_FS_MAKE_REG
;
524 return LANDLOCK_ACCESS_FS_MAKE_DIR
;
526 return LANDLOCK_ACCESS_FS_MAKE_CHAR
;
528 return LANDLOCK_ACCESS_FS_MAKE_BLOCK
;
530 return LANDLOCK_ACCESS_FS_MAKE_FIFO
;
532 return LANDLOCK_ACCESS_FS_MAKE_SOCK
;
540 * Creating multiple links or renaming may lead to privilege escalations if not
541 * handled properly. Indeed, we must be sure that the source doesn't gain more
542 * privileges by being accessible from the destination. This is getting more
543 * complex when dealing with multiple layers. The whole picture can be seen as
544 * a multilayer partial ordering problem. A future version of Landlock will
547 static int hook_path_link(struct dentry
*const old_dentry
,
548 const struct path
*const new_dir
,
549 struct dentry
*const new_dentry
)
551 const struct landlock_ruleset
*const dom
=
552 landlock_get_current_domain();
556 /* The mount points are the same for old and new paths, cf. EXDEV. */
557 if (old_dentry
->d_parent
!= new_dir
->dentry
)
558 /* Gracefully forbids reparenting. */
560 if (unlikely(d_is_negative(old_dentry
)))
562 return check_access_path(dom
, new_dir
,
563 get_mode_access(d_backing_inode(old_dentry
)->i_mode
));
566 static inline u32
maybe_remove(const struct dentry
*const dentry
)
568 if (d_is_negative(dentry
))
570 return d_is_dir(dentry
) ? LANDLOCK_ACCESS_FS_REMOVE_DIR
:
571 LANDLOCK_ACCESS_FS_REMOVE_FILE
;
574 static int hook_path_rename(const struct path
*const old_dir
,
575 struct dentry
*const old_dentry
,
576 const struct path
*const new_dir
,
577 struct dentry
*const new_dentry
)
579 const struct landlock_ruleset
*const dom
=
580 landlock_get_current_domain();
584 /* The mount points are the same for old and new paths, cf. EXDEV. */
585 if (old_dir
->dentry
!= new_dir
->dentry
)
586 /* Gracefully forbids reparenting. */
588 if (unlikely(d_is_negative(old_dentry
)))
590 /* RENAME_EXCHANGE is handled because directories are the same. */
591 return check_access_path(dom
, old_dir
, maybe_remove(old_dentry
) |
592 maybe_remove(new_dentry
) |
593 get_mode_access(d_backing_inode(old_dentry
)->i_mode
));
596 static int hook_path_mkdir(const struct path
*const dir
,
597 struct dentry
*const dentry
, const umode_t mode
)
599 return current_check_access_path(dir
, LANDLOCK_ACCESS_FS_MAKE_DIR
);
602 static int hook_path_mknod(const struct path
*const dir
,
603 struct dentry
*const dentry
, const umode_t mode
,
604 const unsigned int dev
)
606 const struct landlock_ruleset
*const dom
=
607 landlock_get_current_domain();
611 return check_access_path(dom
, dir
, get_mode_access(mode
));
614 static int hook_path_symlink(const struct path
*const dir
,
615 struct dentry
*const dentry
, const char *const old_name
)
617 return current_check_access_path(dir
, LANDLOCK_ACCESS_FS_MAKE_SYM
);
620 static int hook_path_unlink(const struct path
*const dir
,
621 struct dentry
*const dentry
)
623 return current_check_access_path(dir
, LANDLOCK_ACCESS_FS_REMOVE_FILE
);
626 static int hook_path_rmdir(const struct path
*const dir
,
627 struct dentry
*const dentry
)
629 return current_check_access_path(dir
, LANDLOCK_ACCESS_FS_REMOVE_DIR
);
634 static inline u32
get_file_access(const struct file
*const file
)
638 if (file
->f_mode
& FMODE_READ
) {
639 /* A directory can only be opened in read mode. */
640 if (S_ISDIR(file_inode(file
)->i_mode
))
641 return LANDLOCK_ACCESS_FS_READ_DIR
;
642 access
= LANDLOCK_ACCESS_FS_READ_FILE
;
644 if (file
->f_mode
& FMODE_WRITE
)
645 access
|= LANDLOCK_ACCESS_FS_WRITE_FILE
;
646 /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
647 if (file
->f_flags
& __FMODE_EXEC
)
648 access
|= LANDLOCK_ACCESS_FS_EXECUTE
;
652 static int hook_file_open(struct file
*const file
)
654 const struct landlock_ruleset
*const dom
=
655 landlock_get_current_domain();
660 * Because a file may be opened with O_PATH, get_file_access() may
661 * return 0. This case will be handled with a future Landlock
664 return check_access_path(dom
, &file
->f_path
, get_file_access(file
));
667 static struct security_hook_list landlock_hooks
[] __lsm_ro_after_init
= {
668 LSM_HOOK_INIT(inode_free_security
, hook_inode_free_security
),
670 LSM_HOOK_INIT(sb_delete
, hook_sb_delete
),
671 LSM_HOOK_INIT(sb_mount
, hook_sb_mount
),
672 LSM_HOOK_INIT(move_mount
, hook_move_mount
),
673 LSM_HOOK_INIT(sb_umount
, hook_sb_umount
),
674 LSM_HOOK_INIT(sb_remount
, hook_sb_remount
),
675 LSM_HOOK_INIT(sb_pivotroot
, hook_sb_pivotroot
),
677 LSM_HOOK_INIT(path_link
, hook_path_link
),
678 LSM_HOOK_INIT(path_rename
, hook_path_rename
),
679 LSM_HOOK_INIT(path_mkdir
, hook_path_mkdir
),
680 LSM_HOOK_INIT(path_mknod
, hook_path_mknod
),
681 LSM_HOOK_INIT(path_symlink
, hook_path_symlink
),
682 LSM_HOOK_INIT(path_unlink
, hook_path_unlink
),
683 LSM_HOOK_INIT(path_rmdir
, hook_path_rmdir
),
685 LSM_HOOK_INIT(file_open
, hook_file_open
),
688 __init
void landlock_add_fs_hooks(void)
690 security_add_hooks(landlock_hooks
, ARRAY_SIZE(landlock_hooks
),