1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 1991, 1992 Linus Torvalds
8 #include <linux/syscalls.h>
10 #include <linux/capability.h>
11 #include <linux/compat.h>
12 #include <linux/file.h>
14 #include <linux/security.h>
15 #include <linux/export.h>
16 #include <linux/uaccess.h>
17 #include <linux/writeback.h>
18 #include <linux/buffer_head.h>
19 #include <linux/falloc.h>
20 #include <linux/sched/signal.h>
24 #include <asm/ioctls.h>
26 /* So that the fiemap access checks can't overflow on 32 bit machines. */
27 #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
30 * vfs_ioctl - call filesystem specific ioctl methods
31 * @filp: open file to invoke ioctl method on
32 * @cmd: ioctl command to execute
33 * @arg: command-specific argument for ioctl
35 * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise
38 * Returns 0 on success, -errno on error.
40 long vfs_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
44 if (!filp
->f_op
->unlocked_ioctl
)
47 error
= filp
->f_op
->unlocked_ioctl(filp
, cmd
, arg
);
48 if (error
== -ENOIOCTLCMD
)
53 EXPORT_SYMBOL(vfs_ioctl
);
55 static int ioctl_fibmap(struct file
*filp
, int __user
*p
)
57 struct inode
*inode
= file_inode(filp
);
58 struct super_block
*sb
= inode
->i_sb
;
62 if (!capable(CAP_SYS_RAWIO
))
65 error
= get_user(ur_block
, p
);
73 error
= bmap(inode
, &block
);
75 if (block
> INT_MAX
) {
77 pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n",
78 current
->comm
, task_pid_nr(current
),
87 if (put_user(ur_block
, p
))
94 * fiemap_fill_next_extent - Fiemap helper function
95 * @fieinfo: Fiemap context passed into ->fiemap
96 * @logical: Extent logical start offset, in bytes
97 * @phys: Extent physical start offset, in bytes
98 * @len: Extent length, in bytes
99 * @flags: FIEMAP_EXTENT flags that describe this extent
101 * Called from file system ->fiemap callback. Will populate extent
102 * info as passed in via arguments and copy to user memory. On
103 * success, extent count on fieinfo is incremented.
105 * Returns 0 on success, -errno on error, 1 if this was the last
106 * extent that will fit in user array.
108 #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
109 #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
110 #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
111 int fiemap_fill_next_extent(struct fiemap_extent_info
*fieinfo
, u64 logical
,
112 u64 phys
, u64 len
, u32 flags
)
114 struct fiemap_extent extent
;
115 struct fiemap_extent __user
*dest
= fieinfo
->fi_extents_start
;
117 /* only count the extents */
118 if (fieinfo
->fi_extents_max
== 0) {
119 fieinfo
->fi_extents_mapped
++;
120 return (flags
& FIEMAP_EXTENT_LAST
) ? 1 : 0;
123 if (fieinfo
->fi_extents_mapped
>= fieinfo
->fi_extents_max
)
126 if (flags
& SET_UNKNOWN_FLAGS
)
127 flags
|= FIEMAP_EXTENT_UNKNOWN
;
128 if (flags
& SET_NO_UNMOUNTED_IO_FLAGS
)
129 flags
|= FIEMAP_EXTENT_ENCODED
;
130 if (flags
& SET_NOT_ALIGNED_FLAGS
)
131 flags
|= FIEMAP_EXTENT_NOT_ALIGNED
;
133 memset(&extent
, 0, sizeof(extent
));
134 extent
.fe_logical
= logical
;
135 extent
.fe_physical
= phys
;
136 extent
.fe_length
= len
;
137 extent
.fe_flags
= flags
;
139 dest
+= fieinfo
->fi_extents_mapped
;
140 if (copy_to_user(dest
, &extent
, sizeof(extent
)))
143 fieinfo
->fi_extents_mapped
++;
144 if (fieinfo
->fi_extents_mapped
== fieinfo
->fi_extents_max
)
146 return (flags
& FIEMAP_EXTENT_LAST
) ? 1 : 0;
148 EXPORT_SYMBOL(fiemap_fill_next_extent
);
151 * fiemap_check_flags - check validity of requested flags for fiemap
152 * @fieinfo: Fiemap context passed into ->fiemap
153 * @fs_flags: Set of fiemap flags that the file system understands
155 * Called from file system ->fiemap callback. This will compute the
156 * intersection of valid fiemap flags and those that the fs supports. That
157 * value is then compared against the user supplied flags. In case of bad user
158 * flags, the invalid values will be written into the fieinfo structure, and
159 * -EBADR is returned, which tells ioctl_fiemap() to return those values to
160 * userspace. For this reason, a return code of -EBADR should be preserved.
162 * Returns 0 on success, -EBADR on bad flags.
164 int fiemap_check_flags(struct fiemap_extent_info
*fieinfo
, u32 fs_flags
)
168 incompat_flags
= fieinfo
->fi_flags
& ~(FIEMAP_FLAGS_COMPAT
& fs_flags
);
169 if (incompat_flags
) {
170 fieinfo
->fi_flags
= incompat_flags
;
175 EXPORT_SYMBOL(fiemap_check_flags
);
177 static int fiemap_check_ranges(struct super_block
*sb
,
178 u64 start
, u64 len
, u64
*new_len
)
180 u64 maxbytes
= (u64
) sb
->s_maxbytes
;
187 if (start
> maxbytes
)
191 * Shrink request scope to what the fs can actually handle.
193 if (len
> maxbytes
|| (maxbytes
- len
) < start
)
194 *new_len
= maxbytes
- start
;
199 static int ioctl_fiemap(struct file
*filp
, struct fiemap __user
*ufiemap
)
201 struct fiemap fiemap
;
202 struct fiemap_extent_info fieinfo
= { 0, };
203 struct inode
*inode
= file_inode(filp
);
204 struct super_block
*sb
= inode
->i_sb
;
208 if (!inode
->i_op
->fiemap
)
211 if (copy_from_user(&fiemap
, ufiemap
, sizeof(fiemap
)))
214 if (fiemap
.fm_extent_count
> FIEMAP_MAX_EXTENTS
)
217 error
= fiemap_check_ranges(sb
, fiemap
.fm_start
, fiemap
.fm_length
,
222 fieinfo
.fi_flags
= fiemap
.fm_flags
;
223 fieinfo
.fi_extents_max
= fiemap
.fm_extent_count
;
224 fieinfo
.fi_extents_start
= ufiemap
->fm_extents
;
226 if (fiemap
.fm_extent_count
!= 0 &&
227 !access_ok(fieinfo
.fi_extents_start
,
228 fieinfo
.fi_extents_max
* sizeof(struct fiemap_extent
)))
231 if (fieinfo
.fi_flags
& FIEMAP_FLAG_SYNC
)
232 filemap_write_and_wait(inode
->i_mapping
);
234 error
= inode
->i_op
->fiemap(inode
, &fieinfo
, fiemap
.fm_start
, len
);
235 fiemap
.fm_flags
= fieinfo
.fi_flags
;
236 fiemap
.fm_mapped_extents
= fieinfo
.fi_extents_mapped
;
237 if (copy_to_user(ufiemap
, &fiemap
, sizeof(fiemap
)))
243 static long ioctl_file_clone(struct file
*dst_file
, unsigned long srcfd
,
244 u64 off
, u64 olen
, u64 destoff
)
246 struct fd src_file
= fdget(srcfd
);
253 if (src_file
.file
->f_path
.mnt
!= dst_file
->f_path
.mnt
)
255 cloned
= vfs_clone_file_range(src_file
.file
, off
, dst_file
, destoff
,
259 else if (olen
&& cloned
!= olen
)
268 static long ioctl_file_clone_range(struct file
*file
,
269 struct file_clone_range __user
*argp
)
271 struct file_clone_range args
;
273 if (copy_from_user(&args
, argp
, sizeof(args
)))
275 return ioctl_file_clone(file
, args
.src_fd
, args
.src_offset
,
276 args
.src_length
, args
.dest_offset
);
281 static inline sector_t
logical_to_blk(struct inode
*inode
, loff_t offset
)
283 return (offset
>> inode
->i_blkbits
);
286 static inline loff_t
blk_to_logical(struct inode
*inode
, sector_t blk
)
288 return (blk
<< inode
->i_blkbits
);
292 * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
293 * @inode: the inode to map
294 * @fieinfo: the fiemap info struct that will be passed back to userspace
295 * @start: where to start mapping in the inode
296 * @len: how much space to map
297 * @get_block: the fs's get_block function
299 * This does FIEMAP for block based inodes. Basically it will just loop
300 * through get_block until we hit the number of extents we want to map, or we
301 * go past the end of the file and hit a hole.
303 * If it is possible to have data blocks beyond a hole past @inode->i_size, then
304 * please do not use this function, it will stop at the first unmapped block
307 * If you use this function directly, you need to do your own locking. Use
308 * generic_block_fiemap if you want the locking done for you.
311 int __generic_block_fiemap(struct inode
*inode
,
312 struct fiemap_extent_info
*fieinfo
, loff_t start
,
313 loff_t len
, get_block_t
*get_block
)
315 struct buffer_head map_bh
;
316 sector_t start_blk
, last_blk
;
317 loff_t isize
= i_size_read(inode
);
318 u64 logical
= 0, phys
= 0, size
= 0;
319 u32 flags
= FIEMAP_EXTENT_MERGED
;
320 bool past_eof
= false, whole_file
= false;
323 ret
= fiemap_check_flags(fieinfo
, FIEMAP_FLAG_SYNC
);
328 * Either the i_mutex or other appropriate locking needs to be held
329 * since we expect isize to not change at all through the duration of
338 * Some filesystems can't deal with being asked to map less than
339 * blocksize, so make sure our len is at least block length.
341 if (logical_to_blk(inode
, len
) == 0)
342 len
= blk_to_logical(inode
, 1);
344 start_blk
= logical_to_blk(inode
, start
);
345 last_blk
= logical_to_blk(inode
, start
+ len
- 1);
349 * we set b_size to the total size we want so it will map as
350 * many contiguous blocks as possible at once
352 memset(&map_bh
, 0, sizeof(struct buffer_head
));
355 ret
= get_block(inode
, start_blk
, &map_bh
, 0);
360 if (!buffer_mapped(&map_bh
)) {
364 * We want to handle the case where there is an
365 * allocated block at the front of the file, and then
366 * nothing but holes up to the end of the file properly,
367 * to make sure that extent at the front gets properly
368 * marked with FIEMAP_EXTENT_LAST
371 blk_to_logical(inode
, start_blk
) >= isize
)
375 * First hole after going past the EOF, this is our
378 if (past_eof
&& size
) {
379 flags
= FIEMAP_EXTENT_MERGED
|FIEMAP_EXTENT_LAST
;
380 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
384 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
389 /* if we have holes up to/past EOF then we're done */
390 if (start_blk
> last_blk
|| past_eof
|| ret
)
394 * We have gone over the length of what we wanted to
395 * map, and it wasn't the entire file, so add the extent
396 * we got last time and exit.
398 * This is for the case where say we want to map all the
399 * way up to the second to the last block in a file, but
400 * the last block is a hole, making the second to last
401 * block FIEMAP_EXTENT_LAST. In this case we want to
402 * see if there is a hole after the second to last block
403 * so we can mark it properly. If we found data after
404 * we exceeded the length we were requesting, then we
405 * are good to go, just add the extent to the fieinfo
408 if (start_blk
> last_blk
&& !whole_file
) {
409 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
416 * if size != 0 then we know we already have an extent
420 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
427 logical
= blk_to_logical(inode
, start_blk
);
428 phys
= blk_to_logical(inode
, map_bh
.b_blocknr
);
429 size
= map_bh
.b_size
;
430 flags
= FIEMAP_EXTENT_MERGED
;
432 start_blk
+= logical_to_blk(inode
, size
);
435 * If we are past the EOF, then we need to make sure as
436 * soon as we find a hole that the last extent we found
437 * is marked with FIEMAP_EXTENT_LAST
439 if (!past_eof
&& logical
+ size
>= isize
)
443 if (fatal_signal_pending(current
)) {
450 /* If ret is 1 then we just hit the end of the extent array */
456 EXPORT_SYMBOL(__generic_block_fiemap
);
459 * generic_block_fiemap - FIEMAP for block based inodes
460 * @inode: The inode to map
461 * @fieinfo: The mapping information
462 * @start: The initial block to map
463 * @len: The length of the extect to attempt to map
464 * @get_block: The block mapping function for the fs
466 * Calls __generic_block_fiemap to map the inode, after taking
467 * the inode's mutex lock.
470 int generic_block_fiemap(struct inode
*inode
,
471 struct fiemap_extent_info
*fieinfo
, u64 start
,
472 u64 len
, get_block_t
*get_block
)
476 ret
= __generic_block_fiemap(inode
, fieinfo
, start
, len
, get_block
);
480 EXPORT_SYMBOL(generic_block_fiemap
);
482 #endif /* CONFIG_BLOCK */
485 * This provides compatibility with legacy XFS pre-allocation ioctls
486 * which predate the fallocate syscall.
488 * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
489 * are used here, rest are ignored.
491 static int ioctl_preallocate(struct file
*filp
, int mode
, void __user
*argp
)
493 struct inode
*inode
= file_inode(filp
);
494 struct space_resv sr
;
496 if (copy_from_user(&sr
, argp
, sizeof(sr
)))
499 switch (sr
.l_whence
) {
503 sr
.l_start
+= filp
->f_pos
;
506 sr
.l_start
+= i_size_read(inode
);
512 return vfs_fallocate(filp
, mode
| FALLOC_FL_KEEP_SIZE
, sr
.l_start
,
516 /* on ia32 l_start is on a 32-bit boundary */
517 #if defined CONFIG_COMPAT && defined(CONFIG_X86_64)
518 /* just account for different alignment */
519 static int compat_ioctl_preallocate(struct file
*file
, int mode
,
520 struct space_resv_32 __user
*argp
)
522 struct inode
*inode
= file_inode(file
);
523 struct space_resv_32 sr
;
525 if (copy_from_user(&sr
, argp
, sizeof(sr
)))
528 switch (sr
.l_whence
) {
532 sr
.l_start
+= file
->f_pos
;
535 sr
.l_start
+= i_size_read(inode
);
541 return vfs_fallocate(file
, mode
| FALLOC_FL_KEEP_SIZE
, sr
.l_start
, sr
.l_len
);
545 static int file_ioctl(struct file
*filp
, unsigned int cmd
, int __user
*p
)
549 return ioctl_fibmap(filp
, p
);
551 case FS_IOC_RESVSP64
:
552 return ioctl_preallocate(filp
, 0, p
);
553 case FS_IOC_UNRESVSP
:
554 case FS_IOC_UNRESVSP64
:
555 return ioctl_preallocate(filp
, FALLOC_FL_PUNCH_HOLE
, p
);
556 case FS_IOC_ZERO_RANGE
:
557 return ioctl_preallocate(filp
, FALLOC_FL_ZERO_RANGE
, p
);
563 static int ioctl_fionbio(struct file
*filp
, int __user
*argp
)
568 error
= get_user(on
, argp
);
573 /* SunOS compatibility item. */
574 if (O_NONBLOCK
!= O_NDELAY
)
577 spin_lock(&filp
->f_lock
);
579 filp
->f_flags
|= flag
;
581 filp
->f_flags
&= ~flag
;
582 spin_unlock(&filp
->f_lock
);
586 static int ioctl_fioasync(unsigned int fd
, struct file
*filp
,
592 error
= get_user(on
, argp
);
595 flag
= on
? FASYNC
: 0;
597 /* Did FASYNC state change ? */
598 if ((flag
^ filp
->f_flags
) & FASYNC
) {
599 if (filp
->f_op
->fasync
)
600 /* fasync() adjusts filp->f_flags */
601 error
= filp
->f_op
->fasync(fd
, filp
, on
);
605 return error
< 0 ? error
: 0;
608 static int ioctl_fsfreeze(struct file
*filp
)
610 struct super_block
*sb
= file_inode(filp
)->i_sb
;
612 if (!ns_capable(sb
->s_user_ns
, CAP_SYS_ADMIN
))
615 /* If filesystem doesn't support freeze feature, return. */
616 if (sb
->s_op
->freeze_fs
== NULL
&& sb
->s_op
->freeze_super
== NULL
)
620 if (sb
->s_op
->freeze_super
)
621 return sb
->s_op
->freeze_super(sb
);
622 return freeze_super(sb
);
625 static int ioctl_fsthaw(struct file
*filp
)
627 struct super_block
*sb
= file_inode(filp
)->i_sb
;
629 if (!ns_capable(sb
->s_user_ns
, CAP_SYS_ADMIN
))
633 if (sb
->s_op
->thaw_super
)
634 return sb
->s_op
->thaw_super(sb
);
635 return thaw_super(sb
);
638 static int ioctl_file_dedupe_range(struct file
*file
,
639 struct file_dedupe_range __user
*argp
)
641 struct file_dedupe_range
*same
= NULL
;
646 if (get_user(count
, &argp
->dest_count
)) {
651 size
= offsetof(struct file_dedupe_range __user
, info
[count
]);
652 if (size
> PAGE_SIZE
) {
657 same
= memdup_user(argp
, size
);
664 same
->dest_count
= count
;
665 ret
= vfs_dedupe_file_range(file
, same
);
669 ret
= copy_to_user(argp
, same
, size
);
679 * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
680 * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
682 * When you add any new common ioctls to the switches above and below,
683 * please ensure they have compatible arguments in compat mode.
685 static int do_vfs_ioctl(struct file
*filp
, unsigned int fd
,
686 unsigned int cmd
, unsigned long arg
)
688 void __user
*argp
= (void __user
*)arg
;
689 struct inode
*inode
= file_inode(filp
);
693 set_close_on_exec(fd
, 1);
697 set_close_on_exec(fd
, 0);
701 return ioctl_fionbio(filp
, argp
);
704 return ioctl_fioasync(fd
, filp
, argp
);
707 if (S_ISDIR(inode
->i_mode
) || S_ISREG(inode
->i_mode
) ||
708 S_ISLNK(inode
->i_mode
)) {
709 loff_t res
= inode_get_bytes(inode
);
710 return copy_to_user(argp
, &res
, sizeof(res
)) ?
717 return ioctl_fsfreeze(filp
);
720 return ioctl_fsthaw(filp
);
723 return ioctl_fiemap(filp
, argp
);
726 /* anon_bdev filesystems may not have a block size */
727 if (!inode
->i_sb
->s_blocksize
)
730 return put_user(inode
->i_sb
->s_blocksize
, (int __user
*)argp
);
733 return ioctl_file_clone(filp
, arg
, 0, 0, 0);
736 return ioctl_file_clone_range(filp
, argp
);
739 return ioctl_file_dedupe_range(filp
, argp
);
742 if (!S_ISREG(inode
->i_mode
))
743 return vfs_ioctl(filp
, cmd
, arg
);
745 return put_user(i_size_read(inode
) - filp
->f_pos
,
749 if (S_ISREG(inode
->i_mode
))
750 return file_ioctl(filp
, cmd
, argp
);
757 int ksys_ioctl(unsigned int fd
, unsigned int cmd
, unsigned long arg
)
759 struct fd f
= fdget(fd
);
765 error
= security_file_ioctl(f
.file
, cmd
, arg
);
769 error
= do_vfs_ioctl(f
.file
, fd
, cmd
, arg
);
770 if (error
== -ENOIOCTLCMD
)
771 error
= vfs_ioctl(f
.file
, cmd
, arg
);
778 SYSCALL_DEFINE3(ioctl
, unsigned int, fd
, unsigned int, cmd
, unsigned long, arg
)
780 return ksys_ioctl(fd
, cmd
, arg
);
785 * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
787 * This is not normally called as a function, but instead set in struct
790 * .compat_ioctl = compat_ptr_ioctl,
792 * On most architectures, the compat_ptr_ioctl() just passes all arguments
793 * to the corresponding ->ioctl handler. The exception is arch/s390, where
794 * compat_ptr() clears the top bit of a 32-bit pointer value, so user space
795 * pointers to the second 2GB alias the first 2GB, as is the case for
796 * native 32-bit s390 user space.
798 * The compat_ptr_ioctl() function must therefore be used only with ioctl
799 * functions that either ignore the argument or pass a pointer to a
800 * compatible data type.
802 * If any ioctl command handled by fops->unlocked_ioctl passes a plain
803 * integer instead of a pointer, or any of the passed data types
804 * is incompatible between 32-bit and 64-bit architectures, a proper
805 * handler is required instead of compat_ptr_ioctl.
807 long compat_ptr_ioctl(struct file
*file
, unsigned int cmd
, unsigned long arg
)
809 if (!file
->f_op
->unlocked_ioctl
)
812 return file
->f_op
->unlocked_ioctl(file
, cmd
, (unsigned long)compat_ptr(arg
));
814 EXPORT_SYMBOL(compat_ptr_ioctl
);
816 COMPAT_SYSCALL_DEFINE3(ioctl
, unsigned int, fd
, unsigned int, cmd
,
819 struct fd f
= fdget(fd
);
825 /* RED-PEN how should LSM module know it's handling 32bit? */
826 error
= security_file_ioctl(f
.file
, cmd
, arg
);
831 /* FICLONE takes an int argument, so don't use compat_ptr() */
833 error
= ioctl_file_clone(f
.file
, arg
, 0, 0, 0);
836 #if defined(CONFIG_X86_64)
837 /* these get messy on amd64 due to alignment differences */
838 case FS_IOC_RESVSP_32
:
839 case FS_IOC_RESVSP64_32
:
840 error
= compat_ioctl_preallocate(f
.file
, 0, compat_ptr(arg
));
842 case FS_IOC_UNRESVSP_32
:
843 case FS_IOC_UNRESVSP64_32
:
844 error
= compat_ioctl_preallocate(f
.file
, FALLOC_FL_PUNCH_HOLE
,
847 case FS_IOC_ZERO_RANGE_32
:
848 error
= compat_ioctl_preallocate(f
.file
, FALLOC_FL_ZERO_RANGE
,
854 * everything else in do_vfs_ioctl() takes either a compatible
855 * pointer argument or no argument -- call it with a modified
859 error
= do_vfs_ioctl(f
.file
, fd
, cmd
,
860 (unsigned long)compat_ptr(arg
));
861 if (error
!= -ENOIOCTLCMD
)
864 if (f
.file
->f_op
->compat_ioctl
)
865 error
= f
.file
->f_op
->compat_ioctl(f
.file
, cmd
, arg
);
866 if (error
== -ENOIOCTLCMD
)