4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2023, Klara Inc.
26 #include <linux/compat.h>
29 #ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
30 #include <linux/splice.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zfs_vnops.h>
35 #include <sys/zfeature.h>
38 * Clone part of a file via block cloning.
40 * Note that we are not required to update file offsets; the kernel will take
41 * care of that depending on how it was called.
44 zpl_clone_file_range_impl(struct file
*src_file
, loff_t src_off
,
45 struct file
*dst_file
, loff_t dst_off
, size_t len
)
47 struct inode
*src_i
= file_inode(src_file
);
48 struct inode
*dst_i
= file_inode(dst_file
);
49 uint64_t src_off_o
= (uint64_t)src_off
;
50 uint64_t dst_off_o
= (uint64_t)dst_off
;
51 uint64_t len_o
= (uint64_t)len
;
53 fstrans_cookie_t cookie
;
56 if (!zfs_bclone_enabled
)
59 if (!spa_feature_is_enabled(
60 dmu_objset_spa(ITOZSB(dst_i
)->z_os
), SPA_FEATURE_BLOCK_CLONING
))
64 spl_inode_lock_shared(src_i
);
65 spl_inode_lock(dst_i
);
68 cookie
= spl_fstrans_mark();
70 err
= -zfs_clone_range(ITOZ(src_i
), &src_off_o
, ITOZ(dst_i
),
71 &dst_off_o
, &len_o
, cr
);
73 spl_fstrans_unmark(cookie
);
76 spl_inode_unlock(dst_i
);
78 spl_inode_unlock_shared(src_i
);
83 return ((ssize_t
)len_o
);
86 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \
87 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
89 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
90 * to dst_off in dst_file. We are permitted to do this however we like, so we
91 * try to just clone the blocks, and if we can't support it, fall back to the
92 * kernel's generic byte copy function.
95 zpl_copy_file_range(struct file
*src_file
, loff_t src_off
,
96 struct file
*dst_file
, loff_t dst_off
, size_t len
, unsigned int flags
)
100 /* Flags is reserved for future extensions and must be zero. */
104 /* Try to do it via zfs_clone_range() and allow shortening. */
105 ret
= zpl_clone_file_range_impl(src_file
, src_off
,
106 dst_file
, dst_off
, len
);
108 #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
110 * Since Linux 5.3 the filesystem driver is responsible for executing
111 * an appropriate fallback, and a generic fallback function is provided.
113 if (ret
== -EOPNOTSUPP
|| ret
== -EINVAL
|| ret
== -EXDEV
||
115 ret
= generic_copy_file_range(src_file
, src_off
, dst_file
,
116 dst_off
, len
, flags
);
117 #elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
119 * Since 6.8 the fallback function is called splice_copy_file_range
120 * and has a slightly different signature.
122 if (ret
== -EOPNOTSUPP
|| ret
== -EINVAL
|| ret
== -EXDEV
||
124 ret
= splice_copy_file_range(src_file
, src_off
, dst_file
,
128 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
129 * to the kernel that it should fallback to a content copy.
131 if (ret
== -EINVAL
|| ret
== -EXDEV
|| ret
== -EAGAIN
)
133 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
137 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
139 #ifdef HAVE_VFS_REMAP_FILE_RANGE
141 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
143 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
144 * that they must clone - they cannot fall back to copying. FICLONE is exactly
145 * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
146 * the kernel will sort that out for us.
148 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
149 * range in both files and if they're the same, arrange for them to be backed
150 * by the same storage.
152 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
153 * if we want. It's designed for filesystems that may need to shorten the
154 * length for alignment, EOF, or any other requirement. ZFS may shorten the
155 * request when there is outstanding dirty data which hasn't been written.
158 zpl_remap_file_range(struct file
*src_file
, loff_t src_off
,
159 struct file
*dst_file
, loff_t dst_off
, loff_t len
, unsigned int flags
)
161 if (flags
& ~(REMAP_FILE_DEDUP
| REMAP_FILE_CAN_SHORTEN
))
164 /* No support for dedup yet */
165 if (flags
& REMAP_FILE_DEDUP
)
166 return (-EOPNOTSUPP
);
168 /* Zero length means to clone everything to the end of the file */
170 len
= i_size_read(file_inode(src_file
)) - src_off
;
172 ssize_t ret
= zpl_clone_file_range_impl(src_file
, src_off
,
173 dst_file
, dst_off
, len
);
175 if (!(flags
& REMAP_FILE_CAN_SHORTEN
) && ret
>= 0 && ret
!= len
)
180 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
182 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
183 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
185 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
188 zpl_clone_file_range(struct file
*src_file
, loff_t src_off
,
189 struct file
*dst_file
, loff_t dst_off
, uint64_t len
)
191 /* Zero length means to clone everything to the end of the file */
193 len
= i_size_read(file_inode(src_file
)) - src_off
;
195 /* The entire length must be cloned or this is an error. */
196 ssize_t ret
= zpl_clone_file_range_impl(src_file
, src_off
,
197 dst_file
, dst_off
, len
);
199 if (ret
>= 0 && ret
!= len
)
204 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
206 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
208 * Entry point for FIDEDUPERANGE, before Linux 4.20.
211 zpl_dedupe_file_range(struct file
*src_file
, loff_t src_off
,
212 struct file
*dst_file
, loff_t dst_off
, uint64_t len
)
214 /* No support for dedup yet */
215 return (-EOPNOTSUPP
);
217 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
219 /* Entry point for FICLONE, before Linux 4.5. */
221 zpl_ioctl_ficlone(struct file
*dst_file
, void *arg
)
223 unsigned long sfd
= (unsigned long)arg
;
225 struct file
*src_file
= fget(sfd
);
226 if (src_file
== NULL
)
229 if (dst_file
->f_op
!= src_file
->f_op
) {
234 size_t len
= i_size_read(file_inode(src_file
));
236 ssize_t ret
= zpl_clone_file_range_impl(src_file
, 0, dst_file
, 0, len
);
241 if (ret
== -EOPNOTSUPP
)
252 /* Entry point for FICLONERANGE, before Linux 4.5. */
254 zpl_ioctl_ficlonerange(struct file
*dst_file
, void __user
*arg
)
256 zfs_ioc_compat_file_clone_range_t fcr
;
258 if (copy_from_user(&fcr
, arg
, sizeof (fcr
)))
261 struct file
*src_file
= fget(fcr
.fcr_src_fd
);
262 if (src_file
== NULL
)
265 if (dst_file
->f_op
!= src_file
->f_op
) {
270 size_t len
= fcr
.fcr_src_length
;
272 len
= i_size_read(file_inode(src_file
)) - fcr
.fcr_src_offset
;
274 ssize_t ret
= zpl_clone_file_range_impl(src_file
, fcr
.fcr_src_offset
,
275 dst_file
, fcr
.fcr_dest_offset
, len
);
280 if (ret
== -EOPNOTSUPP
)
291 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
293 zpl_ioctl_fideduperange(struct file
*filp
, void *arg
)
297 /* No support for dedup yet */