]> git.proxmox.com Git - mirror_zfs.git/blame - module/os/linux/zfs/zpl_file_range.c
copy_file_range: fix fallback when source create on same txg
[mirror_zfs.git] / module / os / linux / zfs / zpl_file_range.c
CommitLineData
5d12545d
RN
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2023, Klara Inc.
23 */
24
25#ifdef CONFIG_COMPAT
26#include <linux/compat.h>
27#endif
28#include <linux/fs.h>
29#include <sys/file.h>
30#include <sys/zfs_znode.h>
31#include <sys/zfs_vnops.h>
32#include <sys/zfeature.h>
33
34/*
35 * Clone part of a file via block cloning.
36 *
37 * Note that we are not required to update file offsets; the kernel will take
38 * care of that depending on how it was called.
39 */
40static ssize_t
41__zpl_clone_file_range(struct file *src_file, loff_t src_off,
42 struct file *dst_file, loff_t dst_off, size_t len)
43{
44 struct inode *src_i = file_inode(src_file);
45 struct inode *dst_i = file_inode(dst_file);
46 uint64_t src_off_o = (uint64_t)src_off;
47 uint64_t dst_off_o = (uint64_t)dst_off;
48 uint64_t len_o = (uint64_t)len;
49 cred_t *cr = CRED();
50 fstrans_cookie_t cookie;
51 int err;
52
53 if (!spa_feature_is_enabled(
54 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
55 return (-EOPNOTSUPP);
56
57 if (src_i != dst_i)
58 spl_inode_lock_shared(src_i);
59 spl_inode_lock(dst_i);
60
61 crhold(cr);
62 cookie = spl_fstrans_mark();
63
64 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
65 &dst_off_o, &len_o, cr);
66
67 spl_fstrans_unmark(cookie);
68 crfree(cr);
69
70 spl_inode_unlock(dst_i);
71 if (src_i != dst_i)
72 spl_inode_unlock_shared(src_i);
73
74 if (err < 0)
75 return (err);
76
77 return ((ssize_t)len_o);
78}
79
2768dc04
RN
80#if defined(HAVE_VFS_COPY_FILE_RANGE) || \
81 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
5d12545d
RN
82/*
83 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
84 * to dst_off in dst_file. We are permitted to do this however we like, so we
85 * try to just clone the blocks, and if we can't support it, fall back to the
86 * kernel's generic byte copy function.
87 */
88ssize_t
89zpl_copy_file_range(struct file *src_file, loff_t src_off,
90 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
91{
92 ssize_t ret;
93
94 if (flags != 0)
95 return (-EINVAL);
96
97 /* Try to do it via zfs_clone_range() */
2768dc04 98 ret = __zpl_clone_file_range(src_file, src_off,
5d12545d
RN
99 dst_file, dst_off, len);
100
101#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
102 /*
103 * Since Linux 5.3 the filesystem driver is responsible for executing
104 * an appropriate fallback, and a generic fallback function is provided.
105 */
92f095a9
RN
106 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
107 ret == -EAGAIN)
5d12545d
RN
108 ret = generic_copy_file_range(src_file, src_off, dst_file,
109 dst_off, len, flags);
c47f0f44
RN
110#else
111 /*
112 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
113 * to the kernel that it should fallback to a content copy.
114 */
92f095a9 115 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
c47f0f44 116 ret = -EOPNOTSUPP;
5d12545d
RN
117#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
118
119 return (ret);
120}
2768dc04 121#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
5d12545d
RN
122
123#ifdef HAVE_VFS_REMAP_FILE_RANGE
124/*
125 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
126 *
127 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
128 * that they must clone - they cannot fall back to copying. FICLONE is exactly
129 * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
130 * the kernel will sort that out for us.
131 *
132 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
133 * range in both files and if they're the same, arrange for them to be backed
134 * by the same storage.
135 */
136loff_t
137zpl_remap_file_range(struct file *src_file, loff_t src_off,
138 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
139{
140 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
141 return (-EINVAL);
142
143 /*
144 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
145 * range if we want. Its designed for filesystems that make data past
146 * EOF available, and don't want it to be visible in both files. ZFS
147 * doesn't do that, so we just turn the flag off.
148 */
149 flags &= ~REMAP_FILE_CAN_SHORTEN;
150
151 if (flags & REMAP_FILE_DEDUP)
152 /* No support for dedup yet */
153 return (-EOPNOTSUPP);
154
155 /* Zero length means to clone everything to the end of the file */
156 if (len == 0)
157 len = i_size_read(file_inode(src_file)) - src_off;
158
159 return (__zpl_clone_file_range(src_file, src_off,
160 dst_file, dst_off, len));
161}
162#endif /* HAVE_VFS_REMAP_FILE_RANGE */
163
2768dc04
RN
164#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \
165 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND)
5d12545d
RN
166/*
167 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
168 */
169int
170zpl_clone_file_range(struct file *src_file, loff_t src_off,
171 struct file *dst_file, loff_t dst_off, uint64_t len)
172{
173 /* Zero length means to clone everything to the end of the file */
174 if (len == 0)
175 len = i_size_read(file_inode(src_file)) - src_off;
176
177 return (__zpl_clone_file_range(src_file, src_off,
178 dst_file, dst_off, len));
179}
2768dc04 180#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
5d12545d
RN
181
182#ifdef HAVE_VFS_DEDUPE_FILE_RANGE
183/*
184 * Entry point for FIDEDUPERANGE, before Linux 4.20.
185 */
186int
187zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
188 struct file *dst_file, loff_t dst_off, uint64_t len)
189{
190 /* No support for dedup yet */
191 return (-EOPNOTSUPP);
192}
193#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
3366ceaf
RN
194
195/* Entry point for FICLONE, before Linux 4.5. */
196long
197zpl_ioctl_ficlone(struct file *dst_file, void *arg)
198{
199 unsigned long sfd = (unsigned long)arg;
200
201 struct file *src_file = fget(sfd);
202 if (src_file == NULL)
203 return (-EBADF);
204
205 if (dst_file->f_op != src_file->f_op)
206 return (-EXDEV);
207
208 size_t len = i_size_read(file_inode(src_file));
209
210 ssize_t ret =
211 __zpl_clone_file_range(src_file, 0, dst_file, 0, len);
212
213 fput(src_file);
214
215 if (ret < 0) {
216 if (ret == -EOPNOTSUPP)
217 return (-ENOTTY);
218 return (ret);
219 }
220
221 if (ret != len)
222 return (-EINVAL);
223
224 return (0);
225}
226
227/* Entry point for FICLONERANGE, before Linux 4.5. */
228long
229zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
230{
231 zfs_ioc_compat_file_clone_range_t fcr;
232
233 if (copy_from_user(&fcr, arg, sizeof (fcr)))
234 return (-EFAULT);
235
236 struct file *src_file = fget(fcr.fcr_src_fd);
237 if (src_file == NULL)
238 return (-EBADF);
239
240 if (dst_file->f_op != src_file->f_op)
241 return (-EXDEV);
242
243 size_t len = fcr.fcr_src_length;
244 if (len == 0)
245 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
246
247 ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
248 dst_file, fcr.fcr_dest_offset, len);
249
250 fput(src_file);
251
252 if (ret < 0) {
253 if (ret == -EOPNOTSUPP)
254 return (-ENOTTY);
255 return (ret);
256 }
257
258 if (ret != len)
259 return (-EINVAL);
260
261 return (0);
262}
263
264/* Entry point for FIDEDUPERANGE, before Linux 4.5. */
265long
266zpl_ioctl_fideduperange(struct file *filp, void *arg)
267{
268 (void) arg;
269
270 /* No support for dedup yet */
271 return (-ENOTTY);
272}