]> git.proxmox.com Git - mirror_zfs.git/blob - module/os/linux/zfs/zpl_file_range.c
73476ff40ebf1c360cef72ec15ad555cc220f72f
[mirror_zfs.git] / module / os / linux / zfs / zpl_file_range.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2023, Klara Inc.
23 */
24
25 #ifdef CONFIG_COMPAT
26 #include <linux/compat.h>
27 #endif
28 #include <linux/fs.h>
29 #include <sys/file.h>
30 #include <sys/zfs_znode.h>
31 #include <sys/zfs_vnops.h>
32 #include <sys/zfeature.h>
33
34 int zfs_bclone_enabled = 1;
35
36 /*
37 * Clone part of a file via block cloning.
38 *
39 * Note that we are not required to update file offsets; the kernel will take
40 * care of that depending on how it was called.
41 */
42 static ssize_t
43 __zpl_clone_file_range(struct file *src_file, loff_t src_off,
44 struct file *dst_file, loff_t dst_off, size_t len)
45 {
46 struct inode *src_i = file_inode(src_file);
47 struct inode *dst_i = file_inode(dst_file);
48 uint64_t src_off_o = (uint64_t)src_off;
49 uint64_t dst_off_o = (uint64_t)dst_off;
50 uint64_t len_o = (uint64_t)len;
51 cred_t *cr = CRED();
52 fstrans_cookie_t cookie;
53 int err;
54
55 if (!zfs_bclone_enabled)
56 return (-EOPNOTSUPP);
57
58 if (!spa_feature_is_enabled(
59 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
60 return (-EOPNOTSUPP);
61
62 if (src_i != dst_i)
63 spl_inode_lock_shared(src_i);
64 spl_inode_lock(dst_i);
65
66 crhold(cr);
67 cookie = spl_fstrans_mark();
68
69 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
70 &dst_off_o, &len_o, cr);
71
72 spl_fstrans_unmark(cookie);
73 crfree(cr);
74
75 spl_inode_unlock(dst_i);
76 if (src_i != dst_i)
77 spl_inode_unlock_shared(src_i);
78
79 if (err < 0)
80 return (err);
81
82 return ((ssize_t)len_o);
83 }
84
85 #if defined(HAVE_VFS_COPY_FILE_RANGE) || \
86 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
87 /*
88 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
89 * to dst_off in dst_file. We are permitted to do this however we like, so we
90 * try to just clone the blocks, and if we can't support it, fall back to the
91 * kernel's generic byte copy function.
92 */
93 ssize_t
94 zpl_copy_file_range(struct file *src_file, loff_t src_off,
95 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
96 {
97 ssize_t ret;
98
99 if (flags != 0)
100 return (-EINVAL);
101
102 /* Try to do it via zfs_clone_range() */
103 ret = __zpl_clone_file_range(src_file, src_off,
104 dst_file, dst_off, len);
105
106 #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
107 /*
108 * Since Linux 5.3 the filesystem driver is responsible for executing
109 * an appropriate fallback, and a generic fallback function is provided.
110 */
111 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
112 ret == -EAGAIN)
113 ret = generic_copy_file_range(src_file, src_off, dst_file,
114 dst_off, len, flags);
115 #else
116 /*
117 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
118 * to the kernel that it should fallback to a content copy.
119 */
120 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
121 ret = -EOPNOTSUPP;
122 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
123
124 return (ret);
125 }
126 #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
127
128 #ifdef HAVE_VFS_REMAP_FILE_RANGE
129 /*
130 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
131 *
132 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
133 * that they must clone - they cannot fall back to copying. FICLONE is exactly
134 * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
135 * the kernel will sort that out for us.
136 *
137 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
138 * range in both files and if they're the same, arrange for them to be backed
139 * by the same storage.
140 */
141 loff_t
142 zpl_remap_file_range(struct file *src_file, loff_t src_off,
143 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
144 {
145 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
146 return (-EINVAL);
147
148 /*
149 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
150 * range if we want. Its designed for filesystems that make data past
151 * EOF available, and don't want it to be visible in both files. ZFS
152 * doesn't do that, so we just turn the flag off.
153 */
154 flags &= ~REMAP_FILE_CAN_SHORTEN;
155
156 if (flags & REMAP_FILE_DEDUP)
157 /* No support for dedup yet */
158 return (-EOPNOTSUPP);
159
160 /* Zero length means to clone everything to the end of the file */
161 if (len == 0)
162 len = i_size_read(file_inode(src_file)) - src_off;
163
164 return (__zpl_clone_file_range(src_file, src_off,
165 dst_file, dst_off, len));
166 }
167 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
168
169 #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
170 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
171 /*
172 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
173 */
174 int
175 zpl_clone_file_range(struct file *src_file, loff_t src_off,
176 struct file *dst_file, loff_t dst_off, uint64_t len)
177 {
178 /* Zero length means to clone everything to the end of the file */
179 if (len == 0)
180 len = i_size_read(file_inode(src_file)) - src_off;
181
182 return (__zpl_clone_file_range(src_file, src_off,
183 dst_file, dst_off, len));
184 }
185 #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
186
187 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
188 /*
189 * Entry point for FIDEDUPERANGE, before Linux 4.20.
190 */
191 int
192 zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
193 struct file *dst_file, loff_t dst_off, uint64_t len)
194 {
195 /* No support for dedup yet */
196 return (-EOPNOTSUPP);
197 }
198 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
199
200 /* Entry point for FICLONE, before Linux 4.5. */
201 long
202 zpl_ioctl_ficlone(struct file *dst_file, void *arg)
203 {
204 unsigned long sfd = (unsigned long)arg;
205
206 struct file *src_file = fget(sfd);
207 if (src_file == NULL)
208 return (-EBADF);
209
210 if (dst_file->f_op != src_file->f_op) {
211 fput(src_file);
212 return (-EXDEV);
213 }
214
215 size_t len = i_size_read(file_inode(src_file));
216
217 ssize_t ret =
218 __zpl_clone_file_range(src_file, 0, dst_file, 0, len);
219
220 fput(src_file);
221
222 if (ret < 0) {
223 if (ret == -EOPNOTSUPP)
224 return (-ENOTTY);
225 return (ret);
226 }
227
228 if (ret != len)
229 return (-EINVAL);
230
231 return (0);
232 }
233
234 /* Entry point for FICLONERANGE, before Linux 4.5. */
235 long
236 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
237 {
238 zfs_ioc_compat_file_clone_range_t fcr;
239
240 if (copy_from_user(&fcr, arg, sizeof (fcr)))
241 return (-EFAULT);
242
243 struct file *src_file = fget(fcr.fcr_src_fd);
244 if (src_file == NULL)
245 return (-EBADF);
246
247 if (dst_file->f_op != src_file->f_op) {
248 fput(src_file);
249 return (-EXDEV);
250 }
251
252 size_t len = fcr.fcr_src_length;
253 if (len == 0)
254 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
255
256 ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
257 dst_file, fcr.fcr_dest_offset, len);
258
259 fput(src_file);
260
261 if (ret < 0) {
262 if (ret == -EOPNOTSUPP)
263 return (-ENOTTY);
264 return (ret);
265 }
266
267 if (ret != len)
268 return (-EINVAL);
269
270 return (0);
271 }
272
273 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
274 long
275 zpl_ioctl_fideduperange(struct file *filp, void *arg)
276 {
277 (void) arg;
278
279 /* No support for dedup yet */
280 return (-ENOTTY);
281 }