]>
Commit | Line | Data |
---|---|---|
5a35c68b RN |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or https://opensource.org/licenses/CDDL-1.0. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2023, Klara Inc. | |
23 | */ | |
24 | ||
25 | #ifdef CONFIG_COMPAT | |
26 | #include <linux/compat.h> | |
27 | #endif | |
28 | #include <linux/fs.h> | |
ef08a4d4 RN |
29 | #ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE |
30 | #include <linux/splice.h> | |
31 | #endif | |
5a35c68b RN |
32 | #include <sys/file.h> |
33 | #include <sys/zfs_znode.h> | |
34 | #include <sys/zfs_vnops.h> | |
35 | #include <sys/zfeature.h> | |
36 | ||
37 | /* | |
38 | * Clone part of a file via block cloning. | |
39 | * | |
40 | * Note that we are not required to update file offsets; the kernel will take | |
41 | * care of that depending on how it was called. | |
42 | */ | |
43 | static ssize_t | |
6dccdf50 | 44 | zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, |
5a35c68b RN |
45 | struct file *dst_file, loff_t dst_off, size_t len) |
46 | { | |
47 | struct inode *src_i = file_inode(src_file); | |
48 | struct inode *dst_i = file_inode(dst_file); | |
49 | uint64_t src_off_o = (uint64_t)src_off; | |
50 | uint64_t dst_off_o = (uint64_t)dst_off; | |
51 | uint64_t len_o = (uint64_t)len; | |
52 | cred_t *cr = CRED(); | |
53 | fstrans_cookie_t cookie; | |
54 | int err; | |
55 | ||
03e9caae RE |
56 | if (!zfs_bclone_enabled) |
57 | return (-EOPNOTSUPP); | |
58 | ||
5a35c68b RN |
59 | if (!spa_feature_is_enabled( |
60 | dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING)) | |
61 | return (-EOPNOTSUPP); | |
62 | ||
63 | if (src_i != dst_i) | |
64 | spl_inode_lock_shared(src_i); | |
65 | spl_inode_lock(dst_i); | |
66 | ||
67 | crhold(cr); | |
68 | cookie = spl_fstrans_mark(); | |
69 | ||
70 | err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i), | |
71 | &dst_off_o, &len_o, cr); | |
72 | ||
73 | spl_fstrans_unmark(cookie); | |
74 | crfree(cr); | |
75 | ||
76 | spl_inode_unlock(dst_i); | |
77 | if (src_i != dst_i) | |
78 | spl_inode_unlock_shared(src_i); | |
79 | ||
80 | if (err < 0) | |
81 | return (err); | |
82 | ||
83 | return ((ssize_t)len_o); | |
84 | } | |
85 | ||
6b0a4be5 RN |
86 | #if defined(HAVE_VFS_COPY_FILE_RANGE) || \ |
87 | defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) | |
5a35c68b RN |
88 | /* |
89 | * Entry point for copy_file_range(). Copy len bytes from src_off in src_file | |
90 | * to dst_off in dst_file. We are permitted to do this however we like, so we | |
91 | * try to just clone the blocks, and if we can't support it, fall back to the | |
92 | * kernel's generic byte copy function. | |
93 | */ | |
94 | ssize_t | |
95 | zpl_copy_file_range(struct file *src_file, loff_t src_off, | |
96 | struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) | |
97 | { | |
98 | ssize_t ret; | |
99 | ||
6dccdf50 | 100 | /* Flags is reserved for future extensions and must be zero. */ |
5a35c68b RN |
101 | if (flags != 0) |
102 | return (-EINVAL); | |
103 | ||
6dccdf50 BB |
104 | /* Try to do it via zfs_clone_range() and allow shortening. */ |
105 | ret = zpl_clone_file_range_impl(src_file, src_off, | |
5a35c68b RN |
106 | dst_file, dst_off, len); |
107 | ||
ef08a4d4 | 108 | #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE) |
5a35c68b RN |
109 | /* |
110 | * Since Linux 5.3 the filesystem driver is responsible for executing | |
111 | * an appropriate fallback, and a generic fallback function is provided. | |
112 | */ | |
cae502c1 RN |
113 | if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || |
114 | ret == -EAGAIN) | |
5a35c68b RN |
115 | ret = generic_copy_file_range(src_file, src_off, dst_file, |
116 | dst_off, len, flags); | |
ef08a4d4 RN |
117 | #elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE) |
118 | /* | |
119 | * Since 6.8 the fallback function is called splice_copy_file_range | |
120 | * and has a slightly different signature. | |
121 | */ | |
122 | if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || | |
123 | ret == -EAGAIN) | |
124 | ret = splice_copy_file_range(src_file, src_off, dst_file, | |
125 | dst_off, len); | |
ead3eea3 RN |
126 | #else |
127 | /* | |
128 | * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal | |
129 | * to the kernel that it should fallback to a content copy. | |
130 | */ | |
cae502c1 | 131 | if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN) |
ead3eea3 | 132 | ret = -EOPNOTSUPP; |
ef08a4d4 | 133 | #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */ |
5a35c68b RN |
134 | |
135 | return (ret); | |
136 | } | |
6b0a4be5 | 137 | #endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ |
5a35c68b RN |
138 | |
139 | #ifdef HAVE_VFS_REMAP_FILE_RANGE | |
140 | /* | |
141 | * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE. | |
142 | * | |
143 | * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except | |
144 | * that they must clone - they cannot fall back to copying. FICLONE is exactly | |
145 | * FICLONERANGE, for the entire file. We don't need to try to tell them apart; | |
146 | * the kernel will sort that out for us. | |
147 | * | |
148 | * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the | |
149 | * range in both files and if they're the same, arrange for them to be backed | |
150 | * by the same storage. | |
6dccdf50 BB |
151 | * |
152 | * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range | |
153 | * if we want. It's designed for filesystems that may need to shorten the | |
154 | * length for alignment, EOF, or any other requirement. ZFS may shorten the | |
155 | * request when there is outstanding dirty data which hasn't been written. | |
5a35c68b RN |
156 | */ |
157 | loff_t | |
158 | zpl_remap_file_range(struct file *src_file, loff_t src_off, | |
159 | struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags) | |
160 | { | |
161 | if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) | |
162 | return (-EINVAL); | |
163 | ||
6dccdf50 | 164 | /* No support for dedup yet */ |
5a35c68b | 165 | if (flags & REMAP_FILE_DEDUP) |
5a35c68b RN |
166 | return (-EOPNOTSUPP); |
167 | ||
168 | /* Zero length means to clone everything to the end of the file */ | |
169 | if (len == 0) | |
170 | len = i_size_read(file_inode(src_file)) - src_off; | |
171 | ||
6dccdf50 BB |
172 | ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, |
173 | dst_file, dst_off, len); | |
174 | ||
175 | if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len) | |
176 | ret = -EINVAL; | |
177 | ||
178 | return (ret); | |
5a35c68b RN |
179 | } |
180 | #endif /* HAVE_VFS_REMAP_FILE_RANGE */ | |
181 | ||
6b0a4be5 RN |
182 | #if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ |
183 | defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) | |
5a35c68b RN |
184 | /* |
185 | * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. | |
186 | */ | |
187 | int | |
188 | zpl_clone_file_range(struct file *src_file, loff_t src_off, | |
189 | struct file *dst_file, loff_t dst_off, uint64_t len) | |
190 | { | |
191 | /* Zero length means to clone everything to the end of the file */ | |
192 | if (len == 0) | |
193 | len = i_size_read(file_inode(src_file)) - src_off; | |
194 | ||
6dccdf50 BB |
195 | /* The entire length must be cloned or this is an error. */ |
196 | ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, | |
197 | dst_file, dst_off, len); | |
198 | ||
199 | if (ret >= 0 && ret != len) | |
200 | ret = -EINVAL; | |
201 | ||
202 | return (ret); | |
5a35c68b | 203 | } |
6b0a4be5 | 204 | #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ |
5a35c68b RN |
205 | |
206 | #ifdef HAVE_VFS_DEDUPE_FILE_RANGE | |
207 | /* | |
208 | * Entry point for FIDEDUPERANGE, before Linux 4.20. | |
209 | */ | |
210 | int | |
211 | zpl_dedupe_file_range(struct file *src_file, loff_t src_off, | |
212 | struct file *dst_file, loff_t dst_off, uint64_t len) | |
213 | { | |
214 | /* No support for dedup yet */ | |
215 | return (-EOPNOTSUPP); | |
216 | } | |
217 | #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */ | |
9927f219 RN |
218 | |
219 | /* Entry point for FICLONE, before Linux 4.5. */ | |
220 | long | |
221 | zpl_ioctl_ficlone(struct file *dst_file, void *arg) | |
222 | { | |
223 | unsigned long sfd = (unsigned long)arg; | |
224 | ||
225 | struct file *src_file = fget(sfd); | |
226 | if (src_file == NULL) | |
227 | return (-EBADF); | |
228 | ||
bc29124b DB |
229 | if (dst_file->f_op != src_file->f_op) { |
230 | fput(src_file); | |
9927f219 | 231 | return (-EXDEV); |
bc29124b | 232 | } |
9927f219 RN |
233 | |
234 | size_t len = i_size_read(file_inode(src_file)); | |
235 | ||
6dccdf50 | 236 | ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len); |
9927f219 RN |
237 | |
238 | fput(src_file); | |
239 | ||
240 | if (ret < 0) { | |
241 | if (ret == -EOPNOTSUPP) | |
242 | return (-ENOTTY); | |
243 | return (ret); | |
244 | } | |
245 | ||
246 | if (ret != len) | |
247 | return (-EINVAL); | |
248 | ||
249 | return (0); | |
250 | } | |
251 | ||
252 | /* Entry point for FICLONERANGE, before Linux 4.5. */ | |
253 | long | |
254 | zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) | |
255 | { | |
256 | zfs_ioc_compat_file_clone_range_t fcr; | |
257 | ||
258 | if (copy_from_user(&fcr, arg, sizeof (fcr))) | |
259 | return (-EFAULT); | |
260 | ||
261 | struct file *src_file = fget(fcr.fcr_src_fd); | |
262 | if (src_file == NULL) | |
263 | return (-EBADF); | |
264 | ||
bc29124b DB |
265 | if (dst_file->f_op != src_file->f_op) { |
266 | fput(src_file); | |
9927f219 | 267 | return (-EXDEV); |
bc29124b | 268 | } |
9927f219 RN |
269 | |
270 | size_t len = fcr.fcr_src_length; | |
271 | if (len == 0) | |
272 | len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; | |
273 | ||
6dccdf50 | 274 | ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset, |
9927f219 RN |
275 | dst_file, fcr.fcr_dest_offset, len); |
276 | ||
277 | fput(src_file); | |
278 | ||
279 | if (ret < 0) { | |
280 | if (ret == -EOPNOTSUPP) | |
281 | return (-ENOTTY); | |
282 | return (ret); | |
283 | } | |
284 | ||
285 | if (ret != len) | |
286 | return (-EINVAL); | |
287 | ||
288 | return (0); | |
289 | } | |
290 | ||
291 | /* Entry point for FIDEDUPERANGE, before Linux 4.5. */ | |
292 | long | |
293 | zpl_ioctl_fideduperange(struct file *filp, void *arg) | |
294 | { | |
295 | (void) arg; | |
296 | ||
297 | /* No support for dedup yet */ | |
298 | return (-ENOTTY); | |
299 | } |