]>
Commit | Line | Data |
---|---|---|
1efb473f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
1efb473f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. | |
5475aada | 23 | * Copyright (c) 2015 by Chunwei Chen. All rights reserved. |
1efb473f BB |
24 | */ |
25 | ||
26 | ||
f7b939bd CIK |
27 | #ifdef CONFIG_COMPAT |
28 | #include <linux/compat.h> | |
29 | #endif | |
320f0c60 | 30 | #include <linux/fs.h> |
93ce2b4c | 31 | #include <sys/file.h> |
119a394a | 32 | #include <sys/dmu_objset.h> |
657ce253 | 33 | #include <sys/zfs_znode.h> |
1efb473f BB |
34 | #include <sys/zfs_vfsops.h> |
35 | #include <sys/zfs_vnops.h> | |
9c5167d1 | 36 | #include <sys/zfs_project.h> |
7dde17e8 SP |
37 | #if defined(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS) || \ |
38 | defined(HAVE_VFS_FILEMAP_DIRTY_FOLIO) | |
1c24bf96 CK |
39 | #include <linux/pagemap.h> |
40 | #endif | |
320f0c60 FY |
41 | #ifdef HAVE_FILE_FADVISE |
42 | #include <linux/fadvise.h> | |
43 | #endif | |
7dde17e8 SP |
44 | #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO |
45 | #include <linux/writeback.h> | |
46 | #endif | |
1efb473f | 47 | |
f734301d AD |
48 | /* |
49 | * When using fallocate(2) to preallocate space, inflate the requested | |
50 | * capacity check by 10% to account for the required metadata blocks. | |
51 | */ | |
18168da7 | 52 | static unsigned int zfs_fallocate_reserve_percent = 110; |
1efb473f | 53 | |
126400a1 BB |
54 | static int |
55 | zpl_open(struct inode *ip, struct file *filp) | |
56 | { | |
81e97e21 | 57 | cred_t *cr = CRED(); |
126400a1 | 58 | int error; |
40d06e3c | 59 | fstrans_cookie_t cookie; |
126400a1 | 60 | |
7dc71949 CC |
61 | error = generic_file_open(ip, filp); |
62 | if (error) | |
63 | return (error); | |
64 | ||
81e97e21 | 65 | crhold(cr); |
40d06e3c | 66 | cookie = spl_fstrans_mark(); |
126400a1 | 67 | error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr); |
40d06e3c | 68 | spl_fstrans_unmark(cookie); |
81e97e21 | 69 | crfree(cr); |
126400a1 BB |
70 | ASSERT3S(error, <=, 0); |
71 | ||
7dc71949 | 72 | return (error); |
126400a1 BB |
73 | } |
74 | ||
75 | static int | |
76 | zpl_release(struct inode *ip, struct file *filp) | |
77 | { | |
81e97e21 | 78 | cred_t *cr = CRED(); |
126400a1 | 79 | int error; |
40d06e3c | 80 | fstrans_cookie_t cookie; |
126400a1 | 81 | |
40d06e3c | 82 | cookie = spl_fstrans_mark(); |
78d7a5d7 | 83 | if (ITOZ(ip)->z_atime_dirty) |
1e8db771 | 84 | zfs_mark_inode_dirty(ip); |
78d7a5d7 | 85 | |
81e97e21 | 86 | crhold(cr); |
126400a1 | 87 | error = -zfs_close(ip, filp->f_flags, cr); |
40d06e3c | 88 | spl_fstrans_unmark(cookie); |
81e97e21 | 89 | crfree(cr); |
126400a1 BB |
90 | ASSERT3S(error, <=, 0); |
91 | ||
92 | return (error); | |
93 | } | |
94 | ||
1efb473f | 95 | static int |
9464b959 | 96 | zpl_iterate(struct file *filp, zpl_dir_context_t *ctx) |
1efb473f | 97 | { |
81e97e21 | 98 | cred_t *cr = CRED(); |
1efb473f | 99 | int error; |
40d06e3c | 100 | fstrans_cookie_t cookie; |
1efb473f | 101 | |
81e97e21 | 102 | crhold(cr); |
40d06e3c | 103 | cookie = spl_fstrans_mark(); |
d9c97ec0 | 104 | error = -zfs_readdir(file_inode(filp), ctx, cr); |
40d06e3c | 105 | spl_fstrans_unmark(cookie); |
81e97e21 | 106 | crfree(cr); |
1efb473f BB |
107 | ASSERT3S(error, <=, 0); |
108 | ||
109 | return (error); | |
110 | } | |
111 | ||
9baaa7de | 112 | #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED) |
0f37d0c8 RY |
113 | static int |
114 | zpl_readdir(struct file *filp, void *dirent, filldir_t filldir) | |
115 | { | |
9464b959 BB |
116 | zpl_dir_context_t ctx = |
117 | ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos); | |
0f37d0c8 RY |
118 | int error; |
119 | ||
120 | error = zpl_iterate(filp, &ctx); | |
121 | filp->f_pos = ctx.pos; | |
122 | ||
123 | return (error); | |
124 | } | |
9464b959 | 125 | #endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */ |
0f37d0c8 | 126 | |
066e8252 | 127 | #if defined(HAVE_FSYNC_WITHOUT_DENTRY) |
adcd70bd BB |
128 | /* |
129 | * Linux 2.6.35 - 3.0 API, | |
130 | * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed | |
3117dd0b BB |
131 | * redundant. The dentry is still accessible via filp->f_path.dentry, |
132 | * and we are guaranteed that filp will never be NULL. | |
3117dd0b | 133 | */ |
3117dd0b BB |
134 | static int |
135 | zpl_fsync(struct file *filp, int datasync) | |
136 | { | |
adcd70bd BB |
137 | struct inode *inode = filp->f_mapping->host; |
138 | cred_t *cr = CRED(); | |
139 | int error; | |
40d06e3c | 140 | fstrans_cookie_t cookie; |
adcd70bd BB |
141 | |
142 | crhold(cr); | |
40d06e3c | 143 | cookie = spl_fstrans_mark(); |
657ce253 | 144 | error = -zfs_fsync(ITOZ(inode), datasync, cr); |
40d06e3c | 145 | spl_fstrans_unmark(cookie); |
adcd70bd BB |
146 | crfree(cr); |
147 | ASSERT3S(error, <=, 0); | |
148 | ||
149 | return (error); | |
150 | } | |
151 | ||
7ca25051 | 152 | #ifdef HAVE_FILE_AIO_FSYNC |
cd3939c5 RY |
153 | static int |
154 | zpl_aio_fsync(struct kiocb *kiocb, int datasync) | |
155 | { | |
156 | return (zpl_fsync(kiocb->ki_filp, datasync)); | |
157 | } | |
7ca25051 D |
158 | #endif |
159 | ||
adcd70bd BB |
160 | #elif defined(HAVE_FSYNC_RANGE) |
161 | /* | |
69cbd0a3 | 162 | * Linux 3.1 API, |
adcd70bd BB |
163 | * As of 3.1 the responsibility to call filemap_write_and_wait_range() has |
164 | * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex | |
165 | * lock is no longer held by the caller, for zfs we don't require the lock | |
166 | * to be held so we don't acquire it. | |
167 | */ | |
3117dd0b | 168 | static int |
adcd70bd | 169 | zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync) |
1efb473f | 170 | { |
adcd70bd | 171 | struct inode *inode = filp->f_mapping->host; |
411f4a01 SN |
172 | znode_t *zp = ITOZ(inode); |
173 | zfsvfs_t *zfsvfs = ITOZSB(inode); | |
81e97e21 | 174 | cred_t *cr = CRED(); |
1efb473f | 175 | int error; |
40d06e3c | 176 | fstrans_cookie_t cookie; |
1efb473f | 177 | |
411f4a01 SN |
178 | /* |
179 | * The variables z_sync_writes_cnt and z_async_writes_cnt work in | |
180 | * tandem so that sync writes can detect if there are any non-sync | |
181 | * writes going on and vice-versa. The "vice-versa" part to this logic | |
182 | * is located in zfs_putpage() where non-sync writes check if there are | |
183 | * any ongoing sync writes. If any sync and non-sync writes overlap, | |
184 | * we do a commit to complete the non-sync writes since the latter can | |
185 | * potentially take several seconds to complete and thus block sync | |
186 | * writes in the upcoming call to filemap_write_and_wait_range(). | |
187 | */ | |
188 | atomic_inc_32(&zp->z_sync_writes_cnt); | |
189 | /* | |
190 | * If the following check does not detect an overlapping non-sync write | |
191 | * (say because it's just about to start), then it is guaranteed that | |
192 | * the non-sync write will detect this sync write. This is because we | |
193 | * always increment z_sync_writes_cnt / z_async_writes_cnt before doing | |
194 | * the check on z_async_writes_cnt / z_sync_writes_cnt here and in | |
195 | * zfs_putpage() respectively. | |
196 | */ | |
197 | if (atomic_load_32(&zp->z_async_writes_cnt) > 0) { | |
768eaced CC |
198 | if ((error = zpl_enter(zfsvfs, FTAG)) != 0) { |
199 | atomic_dec_32(&zp->z_sync_writes_cnt); | |
200 | return (error); | |
201 | } | |
411f4a01 | 202 | zil_commit(zfsvfs->z_log, zp->z_id); |
768eaced | 203 | zpl_exit(zfsvfs, FTAG); |
411f4a01 SN |
204 | } |
205 | ||
adcd70bd | 206 | error = filemap_write_and_wait_range(inode->i_mapping, start, end); |
411f4a01 SN |
207 | |
208 | /* | |
209 | * The sync write is not complete yet but we decrement | |
210 | * z_sync_writes_cnt since zfs_fsync() increments and decrements | |
211 | * it internally. If a non-sync write starts just after the decrement | |
212 | * operation but before we call zfs_fsync(), it may not detect this | |
213 | * overlapping sync write but it does not matter since we have already | |
214 | * gone past filemap_write_and_wait_range() and we won't block due to | |
215 | * the non-sync write. | |
216 | */ | |
217 | atomic_dec_32(&zp->z_sync_writes_cnt); | |
218 | ||
adcd70bd BB |
219 | if (error) |
220 | return (error); | |
221 | ||
81e97e21 | 222 | crhold(cr); |
40d06e3c | 223 | cookie = spl_fstrans_mark(); |
411f4a01 | 224 | error = -zfs_fsync(zp, datasync, cr); |
40d06e3c | 225 | spl_fstrans_unmark(cookie); |
81e97e21 | 226 | crfree(cr); |
1efb473f BB |
227 | ASSERT3S(error, <=, 0); |
228 | ||
229 | return (error); | |
230 | } | |
cd3939c5 | 231 | |
7ca25051 | 232 | #ifdef HAVE_FILE_AIO_FSYNC |
cd3939c5 RY |
233 | static int |
234 | zpl_aio_fsync(struct kiocb *kiocb, int datasync) | |
235 | { | |
57ae8400 | 236 | return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync)); |
cd3939c5 | 237 | } |
7ca25051 D |
238 | #endif |
239 | ||
adcd70bd BB |
240 | #else |
241 | #error "Unsupported fops->fsync() implementation" | |
242 | #endif | |
1efb473f | 243 | |
b1b94e96 GW |
244 | static inline int |
245 | zfs_io_flags(struct kiocb *kiocb) | |
246 | { | |
247 | int flags = 0; | |
248 | ||
249 | #if defined(IOCB_DSYNC) | |
250 | if (kiocb->ki_flags & IOCB_DSYNC) | |
da92d5cb | 251 | flags |= O_DSYNC; |
b1b94e96 GW |
252 | #endif |
253 | #if defined(IOCB_SYNC) | |
254 | if (kiocb->ki_flags & IOCB_SYNC) | |
da92d5cb | 255 | flags |= O_SYNC; |
b1b94e96 GW |
256 | #endif |
257 | #if defined(IOCB_APPEND) | |
258 | if (kiocb->ki_flags & IOCB_APPEND) | |
da92d5cb | 259 | flags |= O_APPEND; |
b1b94e96 GW |
260 | #endif |
261 | #if defined(IOCB_DIRECT) | |
262 | if (kiocb->ki_flags & IOCB_DIRECT) | |
da92d5cb | 263 | flags |= O_DIRECT; |
b1b94e96 GW |
264 | #endif |
265 | return (flags); | |
266 | } | |
267 | ||
1c2358c1 BB |
268 | /* |
269 | * If relatime is enabled, call file_accessed() if zfs_relatime_need_update() | |
270 | * is true. This is needed since datasets with inherited "relatime" property | |
271 | * aren't necessarily mounted with the MNT_RELATIME flag (e.g. after | |
272 | * `zfs set relatime=...`), which is what relatime test in VFS by | |
273 | * relatime_need_update() is based on. | |
274 | */ | |
275 | static inline void | |
276 | zpl_file_accessed(struct file *filp) | |
1efb473f | 277 | { |
1c2358c1 | 278 | struct inode *ip = filp->f_mapping->host; |
e3dc14b8 | 279 | |
1c2358c1 BB |
280 | if (!IS_NOATIME(ip) && ITOZSB(ip)->z_relatime) { |
281 | if (zfs_relatime_need_update(ip)) | |
282 | file_accessed(filp); | |
283 | } else { | |
284 | file_accessed(filp); | |
285 | } | |
1efb473f BB |
286 | } |
287 | ||
1c2358c1 | 288 | #if defined(HAVE_VFS_RW_ITERATE) |
cd3939c5 | 289 | |
1c2358c1 BB |
290 | /* |
291 | * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports | |
292 | * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to | |
293 | * manipulate the iov_iter are available. In which case the full iov_iter | |
294 | * can be attached to the uio and correctly handled in the lower layers. | |
295 | * Otherwise, for older kernels extract the iovec and pass it instead. | |
296 | */ | |
297 | static void | |
d0cd9a5c | 298 | zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to, |
1c2358c1 BB |
299 | loff_t pos, ssize_t count, size_t skip) |
300 | { | |
301 | #if defined(HAVE_VFS_IOV_ITER) | |
d0cd9a5c | 302 | zfs_uio_iov_iter_init(uio, to, pos, count, skip); |
f6e22561 CK |
303 | #else |
304 | #ifdef HAVE_IOV_ITER_TYPE | |
305 | zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos, | |
306 | iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE, | |
307 | count, skip); | |
1c2358c1 | 308 | #else |
d0cd9a5c | 309 | zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos, |
1c2358c1 BB |
310 | to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE, |
311 | count, skip); | |
312 | #endif | |
f6e22561 | 313 | #endif |
cd3939c5 RY |
314 | } |
315 | ||
cd3939c5 | 316 | static ssize_t |
1c2358c1 | 317 | zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) |
cd3939c5 RY |
318 | { |
319 | cred_t *cr = CRED(); | |
1c2358c1 | 320 | fstrans_cookie_t cookie; |
cd3939c5 | 321 | struct file *filp = kiocb->ki_filp; |
1c2358c1 | 322 | ssize_t count = iov_iter_count(to); |
d0cd9a5c | 323 | zfs_uio_t uio; |
1c2358c1 BB |
324 | |
325 | zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0); | |
cd3939c5 RY |
326 | |
327 | crhold(cr); | |
1c2358c1 BB |
328 | cookie = spl_fstrans_mark(); |
329 | ||
330 | int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, | |
331 | filp->f_flags | zfs_io_flags(kiocb), cr); | |
332 | ||
333 | spl_fstrans_unmark(cookie); | |
cd3939c5 RY |
334 | crfree(cr); |
335 | ||
1c2358c1 BB |
336 | if (error < 0) |
337 | return (error); | |
338 | ||
339 | ssize_t read = count - uio.uio_resid; | |
340 | kiocb->ki_pos += read; | |
341 | ||
342 | zpl_file_accessed(filp); | |
343 | ||
1efb473f BB |
344 | return (read); |
345 | } | |
346 | ||
1c2358c1 BB |
347 | static inline ssize_t |
348 | zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from, | |
349 | size_t *countp) | |
57ae8400 | 350 | { |
1c2358c1 BB |
351 | #ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB |
352 | ssize_t ret = generic_write_checks(kiocb, from); | |
353 | if (ret <= 0) | |
354 | return (ret); | |
355 | ||
356 | *countp = ret; | |
57ae8400 | 357 | #else |
1c2358c1 BB |
358 | struct file *file = kiocb->ki_filp; |
359 | struct address_space *mapping = file->f_mapping; | |
360 | struct inode *ip = mapping->host; | |
361 | int isblk = S_ISBLK(ip->i_mode); | |
933ec999 | 362 | |
1c2358c1 BB |
363 | *countp = iov_iter_count(from); |
364 | ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk); | |
933ec999 CC |
365 | if (ret) |
366 | return (ret); | |
1c2358c1 | 367 | #endif |
933ec999 | 368 | |
1c2358c1 | 369 | return (0); |
57ae8400 | 370 | } |
57ae8400 | 371 | |
5475aada | 372 | static ssize_t |
1c2358c1 | 373 | zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) |
1efb473f | 374 | { |
1c2358c1 | 375 | cred_t *cr = CRED(); |
40d06e3c | 376 | fstrans_cookie_t cookie; |
1c2358c1 BB |
377 | struct file *filp = kiocb->ki_filp; |
378 | struct inode *ip = filp->f_mapping->host; | |
d0cd9a5c | 379 | zfs_uio_t uio; |
8947fa44 | 380 | size_t count = 0; |
1c2358c1 | 381 | ssize_t ret; |
1efb473f | 382 | |
1c2358c1 BB |
383 | ret = zpl_generic_write_checks(kiocb, from, &count); |
384 | if (ret) | |
385 | return (ret); | |
1efdc45e | 386 | |
1c2358c1 | 387 | zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset); |
1efb473f | 388 | |
1c2358c1 | 389 | crhold(cr); |
40d06e3c | 390 | cookie = spl_fstrans_mark(); |
1c2358c1 BB |
391 | |
392 | int error = -zfs_write(ITOZ(ip), &uio, | |
393 | filp->f_flags | zfs_io_flags(kiocb), cr); | |
394 | ||
40d06e3c | 395 | spl_fstrans_unmark(cookie); |
1c2358c1 BB |
396 | crfree(cr); |
397 | ||
1efb473f BB |
398 | if (error < 0) |
399 | return (error); | |
400 | ||
1c2358c1 BB |
401 | ssize_t wrote = count - uio.uio_resid; |
402 | kiocb->ki_pos += wrote; | |
403 | ||
e3dc14b8 | 404 | return (wrote); |
1efb473f | 405 | } |
933ec999 | 406 | |
1c2358c1 | 407 | #else /* !HAVE_VFS_RW_ITERATE */ |
1efb473f | 408 | |
cd3939c5 | 409 | static ssize_t |
1c2358c1 BB |
410 | zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov, |
411 | unsigned long nr_segs, loff_t pos) | |
cd3939c5 RY |
412 | { |
413 | cred_t *cr = CRED(); | |
1c2358c1 | 414 | fstrans_cookie_t cookie; |
cd3939c5 | 415 | struct file *filp = kiocb->ki_filp; |
933ec999 | 416 | size_t count; |
5475aada | 417 | ssize_t ret; |
933ec999 | 418 | |
1c2358c1 | 419 | ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); |
c7af63d6 CC |
420 | if (ret) |
421 | return (ret); | |
933ec999 | 422 | |
d0cd9a5c BA |
423 | zfs_uio_t uio; |
424 | zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, | |
1c2358c1 | 425 | count, 0); |
933ec999 | 426 | |
1c2358c1 BB |
427 | crhold(cr); |
428 | cookie = spl_fstrans_mark(); | |
429 | ||
430 | int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio, | |
431 | filp->f_flags | zfs_io_flags(kiocb), cr); | |
432 | ||
433 | spl_fstrans_unmark(cookie); | |
434 | crfree(cr); | |
435 | ||
436 | if (error < 0) | |
437 | return (error); | |
438 | ||
439 | ssize_t read = count - uio.uio_resid; | |
440 | kiocb->ki_pos += read; | |
933ec999 | 441 | |
1c2358c1 BB |
442 | zpl_file_accessed(filp); |
443 | ||
444 | return (read); | |
57ae8400 | 445 | } |
1c2358c1 | 446 | |
57ae8400 | 447 | static ssize_t |
1c2358c1 | 448 | zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov, |
57ae8400 MK |
449 | unsigned long nr_segs, loff_t pos) |
450 | { | |
1c2358c1 BB |
451 | cred_t *cr = CRED(); |
452 | fstrans_cookie_t cookie; | |
453 | struct file *filp = kiocb->ki_filp; | |
454 | struct inode *ip = filp->f_mapping->host; | |
933ec999 CC |
455 | size_t count; |
456 | ssize_t ret; | |
457 | ||
1c2358c1 | 458 | ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); |
933ec999 CC |
459 | if (ret) |
460 | return (ret); | |
461 | ||
1c2358c1 | 462 | ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode)); |
933ec999 CC |
463 | if (ret) |
464 | return (ret); | |
465 | ||
63b18e40 BB |
466 | kiocb->ki_pos = pos; |
467 | ||
d0cd9a5c BA |
468 | zfs_uio_t uio; |
469 | zfs_uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE, | |
1c2358c1 BB |
470 | count, 0); |
471 | ||
472 | crhold(cr); | |
473 | cookie = spl_fstrans_mark(); | |
474 | ||
475 | int error = -zfs_write(ITOZ(ip), &uio, | |
476 | filp->f_flags | zfs_io_flags(kiocb), cr); | |
477 | ||
478 | spl_fstrans_unmark(cookie); | |
479 | crfree(cr); | |
480 | ||
481 | if (error < 0) | |
482 | return (error); | |
483 | ||
484 | ssize_t wrote = count - uio.uio_resid; | |
485 | kiocb->ki_pos += wrote; | |
486 | ||
487 | return (wrote); | |
57ae8400 MK |
488 | } |
489 | #endif /* HAVE_VFS_RW_ITERATE */ | |
490 | ||
a584ef26 BB |
491 | #if defined(HAVE_VFS_RW_ITERATE) |
492 | static ssize_t | |
493 | zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter) | |
494 | { | |
495 | if (rw == WRITE) | |
496 | return (zpl_iter_write(kiocb, iter)); | |
497 | else | |
498 | return (zpl_iter_read(kiocb, iter)); | |
499 | } | |
500 | #if defined(HAVE_VFS_DIRECT_IO_ITER) | |
501 | static ssize_t | |
502 | zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) | |
503 | { | |
504 | return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); | |
505 | } | |
506 | #elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET) | |
507 | static ssize_t | |
508 | zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) | |
509 | { | |
510 | ASSERT3S(pos, ==, kiocb->ki_pos); | |
511 | return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter)); | |
512 | } | |
513 | #elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) | |
514 | static ssize_t | |
515 | zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) | |
516 | { | |
517 | ASSERT3S(pos, ==, kiocb->ki_pos); | |
518 | return (zpl_direct_IO_impl(rw, kiocb, iter)); | |
519 | } | |
520 | #else | |
521 | #error "Unknown direct IO interface" | |
522 | #endif | |
523 | ||
064c2cf4 | 524 | #else /* HAVE_VFS_RW_ITERATE */ |
a584ef26 BB |
525 | |
526 | #if defined(HAVE_VFS_DIRECT_IO_IOVEC) | |
527 | static ssize_t | |
1c2358c1 | 528 | zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov, |
a584ef26 BB |
529 | loff_t pos, unsigned long nr_segs) |
530 | { | |
531 | if (rw == WRITE) | |
1c2358c1 | 532 | return (zpl_aio_write(kiocb, iov, nr_segs, pos)); |
a584ef26 | 533 | else |
1c2358c1 | 534 | return (zpl_aio_read(kiocb, iov, nr_segs, pos)); |
a584ef26 | 535 | } |
064c2cf4 KK |
536 | #elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET) |
537 | static ssize_t | |
538 | zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) | |
539 | { | |
540 | const struct iovec *iovp = iov_iter_iovec(iter); | |
541 | unsigned long nr_segs = iter->nr_segs; | |
542 | ||
543 | ASSERT3S(pos, ==, kiocb->ki_pos); | |
544 | if (rw == WRITE) | |
545 | return (zpl_aio_write(kiocb, iovp, nr_segs, pos)); | |
546 | else | |
547 | return (zpl_aio_read(kiocb, iovp, nr_segs, pos)); | |
548 | } | |
a584ef26 BB |
549 | #else |
550 | #error "Unknown direct IO interface" | |
551 | #endif | |
552 | ||
553 | #endif /* HAVE_VFS_RW_ITERATE */ | |
554 | ||
802e7b5f LD |
555 | static loff_t |
556 | zpl_llseek(struct file *filp, loff_t offset, int whence) | |
557 | { | |
558 | #if defined(SEEK_HOLE) && defined(SEEK_DATA) | |
40d06e3c TC |
559 | fstrans_cookie_t cookie; |
560 | ||
802e7b5f LD |
561 | if (whence == SEEK_DATA || whence == SEEK_HOLE) { |
562 | struct inode *ip = filp->f_mapping->host; | |
563 | loff_t maxbytes = ip->i_sb->s_maxbytes; | |
564 | loff_t error; | |
565 | ||
9baaa7de | 566 | spl_inode_lock_shared(ip); |
40d06e3c | 567 | cookie = spl_fstrans_mark(); |
8583540c | 568 | error = -zfs_holey(ITOZ(ip), whence, &offset); |
40d06e3c | 569 | spl_fstrans_unmark(cookie); |
802e7b5f LD |
570 | if (error == 0) |
571 | error = lseek_execute(filp, ip, offset, maxbytes); | |
9baaa7de | 572 | spl_inode_unlock_shared(ip); |
802e7b5f LD |
573 | |
574 | return (error); | |
575 | } | |
576 | #endif /* SEEK_HOLE && SEEK_DATA */ | |
577 | ||
d1d7e268 | 578 | return (generic_file_llseek(filp, offset, whence)); |
802e7b5f LD |
579 | } |
580 | ||
c0d35759 BB |
581 | /* |
582 | * It's worth taking a moment to describe how mmap is implemented | |
583 | * for zfs because it differs considerably from other Linux filesystems. | |
584 | * However, this issue is handled the same way under OpenSolaris. | |
585 | * | |
586 | * The issue is that by design zfs bypasses the Linux page cache and | |
587 | * leaves all caching up to the ARC. This has been shown to work | |
588 | * well for the common read(2)/write(2) case. However, mmap(2) | |
589 | * is problem because it relies on being tightly integrated with the | |
590 | * page cache. To handle this we cache mmap'ed files twice, once in | |
591 | * the ARC and a second time in the page cache. The code is careful | |
592 | * to keep both copies synchronized. | |
593 | * | |
594 | * When a file with an mmap'ed region is written to using write(2) | |
595 | * both the data in the ARC and existing pages in the page cache | |
596 | * are updated. For a read(2) data will be read first from the page | |
597 | * cache then the ARC if needed. Neither a write(2) or read(2) will | |
598 | * will ever result in new pages being added to the page cache. | |
599 | * | |
600 | * New pages are added to the page cache only via .readpage() which | |
601 | * is called when the vfs needs to read a page off disk to back the | |
602 | * virtual memory region. These pages may be modified without | |
603 | * notifying the ARC and will be written out periodically via | |
604 | * .writepage(). This will occur due to either a sync or the usual | |
605 | * page aging behavior. Note because a read(2) of a mmap'ed file | |
606 | * will always check the page cache first even when the ARC is out | |
607 | * of date correct data will still be returned. | |
608 | * | |
609 | * While this implementation ensures correct behavior it does have | |
610 | * have some drawbacks. The most obvious of which is that it | |
611 | * increases the required memory footprint when access mmap'ed | |
612 | * files. It also adds additional complexity to the code keeping | |
613 | * both caches synchronized. | |
614 | * | |
615 | * Longer term it may be possible to cleanly resolve this wart by | |
616 | * mapping page cache pages directly on to the ARC buffers. The | |
617 | * Linux address space operations are flexible enough to allow | |
618 | * selection of which pages back a particular index. The trick | |
619 | * would be working out the details of which subsystem is in | |
620 | * charge, the ARC, the page cache, or both. It may also prove | |
621 | * helpful to move the ARC buffers to a scatter-gather lists | |
622 | * rather than a vmalloc'ed region. | |
623 | */ | |
624 | static int | |
625 | zpl_mmap(struct file *filp, struct vm_area_struct *vma) | |
626 | { | |
e2e7aa2d | 627 | struct inode *ip = filp->f_mapping->host; |
c0d35759 | 628 | int error; |
40d06e3c | 629 | fstrans_cookie_t cookie; |
c0d35759 | 630 | |
40d06e3c | 631 | cookie = spl_fstrans_mark(); |
e2e7aa2d BB |
632 | error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start, |
633 | (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags); | |
40d06e3c | 634 | spl_fstrans_unmark(cookie); |
e2e7aa2d BB |
635 | if (error) |
636 | return (error); | |
637 | ||
c0d35759 BB |
638 | error = generic_file_mmap(filp, vma); |
639 | if (error) | |
640 | return (error); | |
641 | ||
3fc92adc BB |
642 | #if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) |
643 | znode_t *zp = ITOZ(ip); | |
c0d35759 | 644 | mutex_enter(&zp->z_lock); |
18a2485f | 645 | zp->z_is_mapped = B_TRUE; |
c0d35759 | 646 | mutex_exit(&zp->z_lock); |
3fc92adc | 647 | #endif |
c0d35759 BB |
648 | |
649 | return (error); | |
650 | } | |
651 | ||
652 | /* | |
653 | * Populate a page with data for the Linux page cache. This function is | |
654 | * only used to support mmap(2). There will be an identical copy of the | |
655 | * data in the ARC which is kept up to date via .write() and .writepage(). | |
c0d35759 | 656 | */ |
23c13c7e AL |
657 | static inline int |
658 | zpl_readpage_common(struct page *pp) | |
c0d35759 BB |
659 | { |
660 | struct inode *ip; | |
dde471ef | 661 | struct page *pl[1]; |
c0d35759 | 662 | int error = 0; |
40d06e3c | 663 | fstrans_cookie_t cookie; |
c0d35759 BB |
664 | |
665 | ASSERT(PageLocked(pp)); | |
666 | ip = pp->mapping->host; | |
dde471ef | 667 | pl[0] = pp; |
c0d35759 | 668 | |
40d06e3c | 669 | cookie = spl_fstrans_mark(); |
dde471ef | 670 | error = -zfs_getpage(ip, pl, 1); |
40d06e3c | 671 | spl_fstrans_unmark(cookie); |
c0d35759 | 672 | |
dde471ef PJ |
673 | if (error) { |
674 | SetPageError(pp); | |
675 | ClearPageUptodate(pp); | |
676 | } else { | |
677 | ClearPageError(pp); | |
678 | SetPageUptodate(pp); | |
679 | flush_dcache_page(pp); | |
680 | } | |
c0d35759 | 681 | |
dde471ef | 682 | unlock_page(pp); |
d1d7e268 | 683 | return (error); |
dde471ef | 684 | } |
c0d35759 | 685 | |
c2c2e7bb BB |
686 | #ifdef HAVE_VFS_READ_FOLIO |
687 | static int | |
688 | zpl_read_folio(struct file *filp, struct folio *folio) | |
689 | { | |
690 | return (zpl_readpage_common(&folio->page)); | |
691 | } | |
692 | #else | |
23c13c7e AL |
693 | static int |
694 | zpl_readpage(struct file *filp, struct page *pp) | |
695 | { | |
696 | return (zpl_readpage_common(pp)); | |
697 | } | |
c2c2e7bb | 698 | #endif |
23c13c7e AL |
699 | |
700 | static int | |
701 | zpl_readpage_filler(void *data, struct page *pp) | |
702 | { | |
703 | return (zpl_readpage_common(pp)); | |
704 | } | |
705 | ||
f3ab88d6 BB |
706 | /* |
707 | * Populate a set of pages with data for the Linux page cache. This | |
708 | * function will only be called for read ahead and never for demand | |
709 | * paging. For simplicity, the code relies on read_cache_pages() to | |
710 | * correctly lock each page for IO and call zpl_readpage(). | |
711 | */ | |
7d524c06 | 712 | #ifdef HAVE_VFS_READPAGES |
f3ab88d6 BB |
713 | static int |
714 | zpl_readpages(struct file *filp, struct address_space *mapping, | |
4ea3f864 | 715 | struct list_head *pages, unsigned nr_pages) |
f3ab88d6 | 716 | { |
23c13c7e | 717 | return (read_cache_pages(mapping, pages, zpl_readpage_filler, NULL)); |
f3ab88d6 | 718 | } |
7d524c06 RS |
719 | #else |
720 | static void | |
721 | zpl_readahead(struct readahead_control *ractl) | |
722 | { | |
723 | struct page *page; | |
724 | ||
725 | while ((page = readahead_page(ractl)) != NULL) { | |
726 | int ret; | |
727 | ||
728 | ret = zpl_readpage_filler(NULL, page); | |
729 | put_page(page); | |
730 | if (ret) | |
731 | break; | |
732 | } | |
733 | } | |
734 | #endif | |
f3ab88d6 | 735 | |
65c7cc49 | 736 | static int |
dde471ef PJ |
737 | zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) |
738 | { | |
411f4a01 | 739 | boolean_t *for_sync = data; |
92119cc2 | 740 | fstrans_cookie_t cookie; |
3c0e5c0f BB |
741 | |
742 | ASSERT(PageLocked(pp)); | |
743 | ASSERT(!PageWriteback(pp)); | |
8630650a | 744 | |
92119cc2 | 745 | cookie = spl_fstrans_mark(); |
411f4a01 | 746 | (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); |
92119cc2 | 747 | spl_fstrans_unmark(cookie); |
c0d35759 | 748 | |
3c0e5c0f | 749 | return (0); |
dde471ef | 750 | } |
c0d35759 | 751 | |
dde471ef PJ |
752 | static int |
753 | zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) | |
754 | { | |
119a394a | 755 | znode_t *zp = ITOZ(mapping->host); |
0037b49e | 756 | zfsvfs_t *zfsvfs = ITOZSB(mapping->host); |
119a394a ED |
757 | enum writeback_sync_modes sync_mode; |
758 | int result; | |
759 | ||
768eaced CC |
760 | if ((result = zpl_enter(zfsvfs, FTAG)) != 0) |
761 | return (result); | |
0037b49e | 762 | if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) |
119a394a | 763 | wbc->sync_mode = WB_SYNC_ALL; |
768eaced | 764 | zpl_exit(zfsvfs, FTAG); |
119a394a ED |
765 | sync_mode = wbc->sync_mode; |
766 | ||
767 | /* | |
768 | * We don't want to run write_cache_pages() in SYNC mode here, because | |
769 | * that would make putpage() wait for a single page to be committed to | |
770 | * disk every single time, resulting in atrocious performance. Instead | |
771 | * we run it once in non-SYNC mode so that the ZIL gets all the data, | |
772 | * and then we commit it all in one go. | |
773 | */ | |
411f4a01 | 774 | boolean_t for_sync = (sync_mode == WB_SYNC_ALL); |
119a394a | 775 | wbc->sync_mode = WB_SYNC_NONE; |
411f4a01 | 776 | result = write_cache_pages(mapping, wbc, zpl_putpage, &for_sync); |
119a394a | 777 | if (sync_mode != wbc->sync_mode) { |
768eaced CC |
778 | if ((result = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) |
779 | return (result); | |
0037b49e BB |
780 | if (zfsvfs->z_log != NULL) |
781 | zil_commit(zfsvfs->z_log, zp->z_id); | |
768eaced | 782 | zpl_exit(zfsvfs, FTAG); |
119a394a ED |
783 | |
784 | /* | |
785 | * We need to call write_cache_pages() again (we can't just | |
786 | * return after the commit) because the previous call in | |
787 | * non-SYNC mode does not guarantee that we got all the dirty | |
788 | * pages (see the implementation of write_cache_pages() for | |
789 | * details). That being said, this is a no-op in most cases. | |
790 | */ | |
791 | wbc->sync_mode = sync_mode; | |
411f4a01 SN |
792 | result = write_cache_pages(mapping, wbc, zpl_putpage, |
793 | &for_sync); | |
119a394a ED |
794 | } |
795 | return (result); | |
c0d35759 BB |
796 | } |
797 | ||
798 | /* | |
799 | * Write out dirty pages to the ARC, this function is only required to | |
800 | * support mmap(2). Mapped pages may be dirtied by memory operations | |
801 | * which never call .write(). These dirty pages are kept in sync with | |
802 | * the ARC buffers via this hook. | |
c0d35759 BB |
803 | */ |
804 | static int | |
805 | zpl_writepage(struct page *pp, struct writeback_control *wbc) | |
806 | { | |
119a394a ED |
807 | if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS) |
808 | wbc->sync_mode = WB_SYNC_ALL; | |
809 | ||
411f4a01 SN |
810 | boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL); |
811 | ||
812 | return (zpl_putpage(pp, wbc, &for_sync)); | |
c0d35759 BB |
813 | } |
814 | ||
cb2d1901 | 815 | /* |
f734301d AD |
816 | * The flag combination which matches the behavior of zfs_space() is |
817 | * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE | |
223df016 | 818 | * flag was introduced in the 2.6.38 kernel. |
f734301d AD |
819 | * |
820 | * The original mode=0 (allocate space) behavior can be reasonably emulated | |
821 | * by checking if enough space exists and creating a sparse file, as real | |
822 | * persistent space reservation is not possible due to COW, snapshots, etc. | |
cb2d1901 | 823 | */ |
066e8252 | 824 | static long |
cb2d1901 ED |
825 | zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) |
826 | { | |
223df016 | 827 | cred_t *cr = CRED(); |
223df016 | 828 | loff_t olen; |
40d06e3c | 829 | fstrans_cookie_t cookie; |
f734301d | 830 | int error = 0; |
223df016 | 831 | |
4372e96f RE |
832 | int test_mode = FALLOC_FL_PUNCH_HOLE; |
833 | #ifdef HAVE_FALLOC_FL_ZERO_RANGE | |
834 | test_mode |= FALLOC_FL_ZERO_RANGE; | |
835 | #endif | |
836 | ||
837 | if ((mode & ~(FALLOC_FL_KEEP_SIZE | test_mode)) != 0) | |
da92d5cb | 838 | return (-EOPNOTSUPP); |
cb2d1901 | 839 | |
223df016 TC |
840 | if (offset < 0 || len <= 0) |
841 | return (-EINVAL); | |
cb2d1901 | 842 | |
223df016 TC |
843 | spl_inode_lock(ip); |
844 | olen = i_size_read(ip); | |
cb2d1901 | 845 | |
9fa4db44 | 846 | crhold(cr); |
40d06e3c | 847 | cookie = spl_fstrans_mark(); |
4372e96f | 848 | if (mode & (test_mode)) { |
f734301d AD |
849 | flock64_t bf; |
850 | ||
b657f2c5 RE |
851 | if (mode & FALLOC_FL_KEEP_SIZE) { |
852 | if (offset > olen) | |
853 | goto out_unmark; | |
f734301d | 854 | |
b657f2c5 RE |
855 | if (offset + len > olen) |
856 | len = olen - offset; | |
857 | } | |
f734301d AD |
858 | bf.l_type = F_WRLCK; |
859 | bf.l_whence = SEEK_SET; | |
860 | bf.l_start = offset; | |
861 | bf.l_len = len; | |
862 | bf.l_pid = 0; | |
863 | ||
864 | error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr); | |
865 | } else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) { | |
866 | unsigned int percent = zfs_fallocate_reserve_percent; | |
867 | struct kstatfs statfs; | |
868 | ||
869 | /* Legacy mode, disable fallocate compatibility. */ | |
870 | if (percent == 0) { | |
871 | error = -EOPNOTSUPP; | |
872 | goto out_unmark; | |
873 | } | |
874 | ||
875 | /* | |
876 | * Use zfs_statvfs() instead of dmu_objset_space() since it | |
877 | * also checks project quota limits, which are relevant here. | |
878 | */ | |
879 | error = zfs_statvfs(ip, &statfs); | |
880 | if (error) | |
881 | goto out_unmark; | |
882 | ||
883 | /* | |
884 | * Shrink available space a bit to account for overhead/races. | |
885 | * We know the product previously fit into availbytes from | |
886 | * dmu_objset_space(), so the smaller product will also fit. | |
887 | */ | |
888 | if (len > statfs.f_bavail * (statfs.f_bsize * 100 / percent)) { | |
889 | error = -ENOSPC; | |
890 | goto out_unmark; | |
891 | } | |
892 | if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen) | |
893 | error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE); | |
894 | } | |
895 | out_unmark: | |
40d06e3c | 896 | spl_fstrans_unmark(cookie); |
223df016 | 897 | spl_inode_unlock(ip); |
cb2d1901 ED |
898 | |
899 | crfree(cr); | |
900 | ||
cb2d1901 ED |
901 | return (error); |
902 | } | |
903 | ||
cb2d1901 ED |
904 | static long |
905 | zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) | |
906 | { | |
d9c97ec0 | 907 | return zpl_fallocate_common(file_inode(filp), |
cb2d1901 ED |
908 | mode, offset, len); |
909 | } | |
cb2d1901 | 910 | |
3fa5266d RM |
911 | static int |
912 | zpl_ioctl_getversion(struct file *filp, void __user *arg) | |
913 | { | |
914 | uint32_t generation = file_inode(filp)->i_generation; | |
915 | ||
916 | return (copy_to_user(arg, &generation, sizeof (generation))); | |
917 | } | |
918 | ||
320f0c60 FY |
919 | #ifdef HAVE_FILE_FADVISE |
920 | static int | |
921 | zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) | |
922 | { | |
923 | struct inode *ip = file_inode(filp); | |
924 | znode_t *zp = ITOZ(ip); | |
925 | zfsvfs_t *zfsvfs = ITOZSB(ip); | |
926 | objset_t *os = zfsvfs->z_os; | |
927 | int error = 0; | |
928 | ||
929 | if (S_ISFIFO(ip->i_mode)) | |
930 | return (-ESPIPE); | |
931 | ||
932 | if (offset < 0 || len < 0) | |
933 | return (-EINVAL); | |
934 | ||
7dee043a BB |
935 | if ((error = zpl_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) |
936 | return (error); | |
320f0c60 FY |
937 | |
938 | switch (advice) { | |
939 | case POSIX_FADV_SEQUENTIAL: | |
940 | case POSIX_FADV_WILLNEED: | |
941 | #ifdef HAVE_GENERIC_FADVISE | |
3fc92adc | 942 | if (zn_has_cached_data(zp, offset, offset + len - 1)) |
320f0c60 FY |
943 | error = generic_fadvise(filp, offset, len, advice); |
944 | #endif | |
945 | /* | |
946 | * Pass on the caller's size directly, but note that | |
947 | * dmu_prefetch_max will effectively cap it. If there | |
948 | * really is a larger sequential access pattern, perhaps | |
949 | * dmu_zfetch will detect it. | |
950 | */ | |
951 | if (len == 0) | |
952 | len = i_size_read(ip) - offset; | |
953 | ||
954 | dmu_prefetch(os, zp->z_id, 0, offset, len, | |
955 | ZIO_PRIORITY_ASYNC_READ); | |
956 | break; | |
957 | case POSIX_FADV_NORMAL: | |
958 | case POSIX_FADV_RANDOM: | |
959 | case POSIX_FADV_DONTNEED: | |
960 | case POSIX_FADV_NOREUSE: | |
961 | /* ignored for now */ | |
962 | break; | |
963 | default: | |
964 | error = -EINVAL; | |
965 | break; | |
966 | } | |
967 | ||
7dee043a | 968 | zfs_exit(zfsvfs, FTAG); |
320f0c60 FY |
969 | |
970 | return (error); | |
971 | } | |
972 | #endif /* HAVE_FILE_FADVISE */ | |
973 | ||
9c5167d1 NF |
974 | #define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL) |
975 | #define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL) | |
976 | ||
977 | static uint32_t | |
978 | __zpl_ioctl_getflags(struct inode *ip) | |
9d317793 | 979 | { |
9d317793 | 980 | uint64_t zfs_flags = ITOZ(ip)->z_pflags; |
9c5167d1 | 981 | uint32_t ioctl_flags = 0; |
9d317793 RY |
982 | |
983 | if (zfs_flags & ZFS_IMMUTABLE) | |
984 | ioctl_flags |= FS_IMMUTABLE_FL; | |
985 | ||
986 | if (zfs_flags & ZFS_APPENDONLY) | |
987 | ioctl_flags |= FS_APPEND_FL; | |
988 | ||
989 | if (zfs_flags & ZFS_NODUMP) | |
990 | ioctl_flags |= FS_NODUMP_FL; | |
991 | ||
9c5167d1 NF |
992 | if (zfs_flags & ZFS_PROJINHERIT) |
993 | ioctl_flags |= ZFS_PROJINHERIT_FL; | |
9d317793 | 994 | |
9c5167d1 NF |
995 | return (ioctl_flags & ZFS_FL_USER_VISIBLE); |
996 | } | |
9d317793 | 997 | |
9c5167d1 NF |
998 | /* |
999 | * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file | |
1000 | * attributes common to both Linux and Solaris are mapped. | |
1001 | */ | |
1002 | static int | |
1003 | zpl_ioctl_getflags(struct file *filp, void __user *arg) | |
1004 | { | |
1005 | uint32_t flags; | |
1006 | int err; | |
1007 | ||
1008 | flags = __zpl_ioctl_getflags(file_inode(filp)); | |
1009 | err = copy_to_user(arg, &flags, sizeof (flags)); | |
1010 | ||
1011 | return (err); | |
9d317793 RY |
1012 | } |
1013 | ||
1014 | /* | |
1015 | * fchange() is a helper macro to detect if we have been asked to change a | |
1016 | * flag. This is ugly, but the requirement that we do this is a consequence of | |
1017 | * how the Linux file attribute interface was designed. Another consequence is | |
1018 | * that concurrent modification of files suffers from a TOCTOU race. Neither | |
1019 | * are things we can fix without modifying the kernel-userland interface, which | |
1020 | * is outside of our jurisdiction. | |
1021 | */ | |
1022 | ||
c360af54 | 1023 | #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1))) |
9d317793 RY |
1024 | |
1025 | static int | |
9c5167d1 | 1026 | __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva) |
9d317793 | 1027 | { |
9c5167d1 NF |
1028 | uint64_t zfs_flags = ITOZ(ip)->z_pflags; |
1029 | xoptattr_t *xoap; | |
9d317793 | 1030 | |
9c5167d1 NF |
1031 | if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | |
1032 | ZFS_PROJINHERIT_FL)) | |
9d317793 RY |
1033 | return (-EOPNOTSUPP); |
1034 | ||
9c5167d1 | 1035 | if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE) |
9d317793 RY |
1036 | return (-EACCES); |
1037 | ||
1038 | if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) || | |
1039 | fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) && | |
1040 | !capable(CAP_LINUX_IMMUTABLE)) | |
2037edbd | 1041 | return (-EPERM); |
9d317793 | 1042 | |
e2a82961 | 1043 | if (!zpl_inode_owner_or_capable(kcred->user_ns, ip)) |
9d317793 RY |
1044 | return (-EACCES); |
1045 | ||
9c5167d1 NF |
1046 | xva_init(xva); |
1047 | xoap = xva_getxoptattr(xva); | |
9d317793 | 1048 | |
39a4daf7 US |
1049 | #define FLAG_CHANGE(iflag, zflag, xflag, xfield) do { \ |
1050 | if (((ioctl_flags & (iflag)) && !(zfs_flags & (zflag))) || \ | |
1051 | ((zfs_flags & (zflag)) && !(ioctl_flags & (iflag)))) { \ | |
1052 | XVA_SET_REQ(xva, (xflag)); \ | |
1053 | (xfield) = ((ioctl_flags & (iflag)) != 0); \ | |
1054 | } \ | |
1055 | } while (0) | |
1056 | ||
1057 | FLAG_CHANGE(FS_IMMUTABLE_FL, ZFS_IMMUTABLE, XAT_IMMUTABLE, | |
1058 | xoap->xoa_immutable); | |
1059 | FLAG_CHANGE(FS_APPEND_FL, ZFS_APPENDONLY, XAT_APPENDONLY, | |
1060 | xoap->xoa_appendonly); | |
1061 | FLAG_CHANGE(FS_NODUMP_FL, ZFS_NODUMP, XAT_NODUMP, | |
1062 | xoap->xoa_nodump); | |
1063 | FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT, | |
1064 | xoap->xoa_projinherit); | |
1065 | ||
1066 | #undef FLAG_CHANGE | |
9c5167d1 NF |
1067 | |
1068 | return (0); | |
1069 | } | |
1070 | ||
1071 | static int | |
1072 | zpl_ioctl_setflags(struct file *filp, void __user *arg) | |
1073 | { | |
1074 | struct inode *ip = file_inode(filp); | |
1075 | uint32_t flags; | |
1076 | cred_t *cr = CRED(); | |
1077 | xvattr_t xva; | |
1078 | int err; | |
1079 | fstrans_cookie_t cookie; | |
1080 | ||
1081 | if (copy_from_user(&flags, arg, sizeof (flags))) | |
1082 | return (-EFAULT); | |
1083 | ||
1084 | err = __zpl_ioctl_setflags(ip, flags, &xva); | |
1085 | if (err) | |
1086 | return (err); | |
1087 | ||
9d317793 | 1088 | crhold(cr); |
40d06e3c | 1089 | cookie = spl_fstrans_mark(); |
f224eddf | 1090 | err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns); |
40d06e3c | 1091 | spl_fstrans_unmark(cookie); |
9d317793 RY |
1092 | crfree(cr); |
1093 | ||
9c5167d1 NF |
1094 | return (err); |
1095 | } | |
1096 | ||
1097 | static int | |
1098 | zpl_ioctl_getxattr(struct file *filp, void __user *arg) | |
1099 | { | |
1100 | zfsxattr_t fsx = { 0 }; | |
1101 | struct inode *ip = file_inode(filp); | |
1102 | int err; | |
1103 | ||
1104 | fsx.fsx_xflags = __zpl_ioctl_getflags(ip); | |
1105 | fsx.fsx_projid = ITOZ(ip)->z_projid; | |
1106 | err = copy_to_user(arg, &fsx, sizeof (fsx)); | |
1107 | ||
1108 | return (err); | |
1109 | } | |
1110 | ||
1111 | static int | |
1112 | zpl_ioctl_setxattr(struct file *filp, void __user *arg) | |
1113 | { | |
1114 | struct inode *ip = file_inode(filp); | |
1115 | zfsxattr_t fsx; | |
1116 | cred_t *cr = CRED(); | |
1117 | xvattr_t xva; | |
1118 | xoptattr_t *xoap; | |
1119 | int err; | |
1120 | fstrans_cookie_t cookie; | |
1121 | ||
1122 | if (copy_from_user(&fsx, arg, sizeof (fsx))) | |
1123 | return (-EFAULT); | |
1124 | ||
1125 | if (!zpl_is_valid_projid(fsx.fsx_projid)) | |
1126 | return (-EINVAL); | |
1127 | ||
1128 | err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva); | |
1129 | if (err) | |
1130 | return (err); | |
1131 | ||
1132 | xoap = xva_getxoptattr(&xva); | |
1133 | XVA_SET_REQ(&xva, XAT_PROJID); | |
1134 | xoap->xoa_projid = fsx.fsx_projid; | |
1135 | ||
1136 | crhold(cr); | |
1137 | cookie = spl_fstrans_mark(); | |
f224eddf | 1138 | err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns); |
9c5167d1 NF |
1139 | spl_fstrans_unmark(cookie); |
1140 | crfree(cr); | |
1141 | ||
1142 | return (err); | |
9d317793 RY |
1143 | } |
1144 | ||
39a4daf7 US |
1145 | /* |
1146 | * Expose Additional File Level Attributes of ZFS. | |
1147 | */ | |
1148 | static int | |
1149 | zpl_ioctl_getdosflags(struct file *filp, void __user *arg) | |
1150 | { | |
1151 | struct inode *ip = file_inode(filp); | |
1152 | uint64_t dosflags = ITOZ(ip)->z_pflags; | |
1153 | dosflags &= ZFS_DOS_FL_USER_VISIBLE; | |
1154 | int err = copy_to_user(arg, &dosflags, sizeof (dosflags)); | |
1155 | ||
1156 | return (err); | |
1157 | } | |
1158 | ||
1159 | static int | |
1160 | __zpl_ioctl_setdosflags(struct inode *ip, uint64_t ioctl_flags, xvattr_t *xva) | |
1161 | { | |
1162 | uint64_t zfs_flags = ITOZ(ip)->z_pflags; | |
1163 | xoptattr_t *xoap; | |
1164 | ||
1165 | if (ioctl_flags & (~ZFS_DOS_FL_USER_VISIBLE)) | |
1166 | return (-EOPNOTSUPP); | |
1167 | ||
1168 | if ((fchange(ioctl_flags, zfs_flags, ZFS_IMMUTABLE, ZFS_IMMUTABLE) || | |
1169 | fchange(ioctl_flags, zfs_flags, ZFS_APPENDONLY, ZFS_APPENDONLY)) && | |
1170 | !capable(CAP_LINUX_IMMUTABLE)) | |
1171 | return (-EPERM); | |
1172 | ||
1173 | if (!zpl_inode_owner_or_capable(kcred->user_ns, ip)) | |
1174 | return (-EACCES); | |
1175 | ||
1176 | xva_init(xva); | |
1177 | xoap = xva_getxoptattr(xva); | |
1178 | ||
1179 | #define FLAG_CHANGE(iflag, xflag, xfield) do { \ | |
1180 | if (((ioctl_flags & (iflag)) && !(zfs_flags & (iflag))) || \ | |
1181 | ((zfs_flags & (iflag)) && !(ioctl_flags & (iflag)))) { \ | |
1182 | XVA_SET_REQ(xva, (xflag)); \ | |
1183 | (xfield) = ((ioctl_flags & (iflag)) != 0); \ | |
1184 | } \ | |
1185 | } while (0) | |
1186 | ||
1187 | FLAG_CHANGE(ZFS_IMMUTABLE, XAT_IMMUTABLE, xoap->xoa_immutable); | |
1188 | FLAG_CHANGE(ZFS_APPENDONLY, XAT_APPENDONLY, xoap->xoa_appendonly); | |
1189 | FLAG_CHANGE(ZFS_NODUMP, XAT_NODUMP, xoap->xoa_nodump); | |
1190 | FLAG_CHANGE(ZFS_READONLY, XAT_READONLY, xoap->xoa_readonly); | |
1191 | FLAG_CHANGE(ZFS_HIDDEN, XAT_HIDDEN, xoap->xoa_hidden); | |
1192 | FLAG_CHANGE(ZFS_SYSTEM, XAT_SYSTEM, xoap->xoa_system); | |
1193 | FLAG_CHANGE(ZFS_ARCHIVE, XAT_ARCHIVE, xoap->xoa_archive); | |
1194 | FLAG_CHANGE(ZFS_NOUNLINK, XAT_NOUNLINK, xoap->xoa_nounlink); | |
1195 | FLAG_CHANGE(ZFS_REPARSE, XAT_REPARSE, xoap->xoa_reparse); | |
1196 | FLAG_CHANGE(ZFS_OFFLINE, XAT_OFFLINE, xoap->xoa_offline); | |
1197 | FLAG_CHANGE(ZFS_SPARSE, XAT_SPARSE, xoap->xoa_sparse); | |
1198 | ||
1199 | #undef FLAG_CHANGE | |
1200 | ||
1201 | return (0); | |
1202 | } | |
1203 | ||
1204 | /* | |
1205 | * Set Additional File Level Attributes of ZFS. | |
1206 | */ | |
1207 | static int | |
1208 | zpl_ioctl_setdosflags(struct file *filp, void __user *arg) | |
1209 | { | |
1210 | struct inode *ip = file_inode(filp); | |
1211 | uint64_t dosflags; | |
1212 | cred_t *cr = CRED(); | |
1213 | xvattr_t xva; | |
1214 | int err; | |
1215 | fstrans_cookie_t cookie; | |
1216 | ||
1217 | if (copy_from_user(&dosflags, arg, sizeof (dosflags))) | |
1218 | return (-EFAULT); | |
1219 | ||
1220 | err = __zpl_ioctl_setdosflags(ip, dosflags, &xva); | |
1221 | if (err) | |
1222 | return (err); | |
1223 | ||
1224 | crhold(cr); | |
1225 | cookie = spl_fstrans_mark(); | |
f224eddf | 1226 | err = -zfs_setattr(ITOZ(ip), (vattr_t *)&xva, 0, cr, kcred->user_ns); |
39a4daf7 US |
1227 | spl_fstrans_unmark(cookie); |
1228 | crfree(cr); | |
1229 | ||
1230 | return (err); | |
1231 | } | |
1232 | ||
88c28395 BB |
1233 | static long |
1234 | zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
1235 | { | |
1236 | switch (cmd) { | |
3fa5266d RM |
1237 | case FS_IOC_GETVERSION: |
1238 | return (zpl_ioctl_getversion(filp, (void *)arg)); | |
9d317793 RY |
1239 | case FS_IOC_GETFLAGS: |
1240 | return (zpl_ioctl_getflags(filp, (void *)arg)); | |
1241 | case FS_IOC_SETFLAGS: | |
1242 | return (zpl_ioctl_setflags(filp, (void *)arg)); | |
9c5167d1 NF |
1243 | case ZFS_IOC_FSGETXATTR: |
1244 | return (zpl_ioctl_getxattr(filp, (void *)arg)); | |
1245 | case ZFS_IOC_FSSETXATTR: | |
1246 | return (zpl_ioctl_setxattr(filp, (void *)arg)); | |
39a4daf7 US |
1247 | case ZFS_IOC_GETDOSFLAGS: |
1248 | return (zpl_ioctl_getdosflags(filp, (void *)arg)); | |
1249 | case ZFS_IOC_SETDOSFLAGS: | |
1250 | return (zpl_ioctl_setdosflags(filp, (void *)arg)); | |
88c28395 BB |
1251 | default: |
1252 | return (-ENOTTY); | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | #ifdef CONFIG_COMPAT | |
1257 | static long | |
1258 | zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |
1259 | { | |
f7b939bd | 1260 | switch (cmd) { |
3fa5266d RM |
1261 | case FS_IOC32_GETVERSION: |
1262 | cmd = FS_IOC_GETVERSION; | |
1263 | break; | |
f7b939bd CIK |
1264 | case FS_IOC32_GETFLAGS: |
1265 | cmd = FS_IOC_GETFLAGS; | |
1266 | break; | |
1267 | case FS_IOC32_SETFLAGS: | |
1268 | cmd = FS_IOC_SETFLAGS; | |
1269 | break; | |
1270 | default: | |
1271 | return (-ENOTTY); | |
1272 | } | |
1273 | return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg))); | |
88c28395 BB |
1274 | } |
1275 | #endif /* CONFIG_COMPAT */ | |
1276 | ||
1277 | ||
1efb473f | 1278 | const struct address_space_operations zpl_address_space_operations = { |
7d524c06 | 1279 | #ifdef HAVE_VFS_READPAGES |
dde471ef | 1280 | .readpages = zpl_readpages, |
7d524c06 RS |
1281 | #else |
1282 | .readahead = zpl_readahead, | |
1283 | #endif | |
c2c2e7bb BB |
1284 | #ifdef HAVE_VFS_READ_FOLIO |
1285 | .read_folio = zpl_read_folio, | |
1286 | #else | |
1efb473f | 1287 | .readpage = zpl_readpage, |
c2c2e7bb | 1288 | #endif |
1efb473f | 1289 | .writepage = zpl_writepage, |
d1d7e268 | 1290 | .writepages = zpl_writepages, |
a584ef26 | 1291 | .direct_IO = zpl_direct_IO, |
1c24bf96 CK |
1292 | #ifdef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS |
1293 | .set_page_dirty = __set_page_dirty_nobuffers, | |
1294 | #endif | |
7dde17e8 SP |
1295 | #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO |
1296 | .dirty_folio = filemap_dirty_folio, | |
1297 | #endif | |
1efb473f BB |
1298 | }; |
1299 | ||
1300 | const struct file_operations zpl_file_operations = { | |
126400a1 BB |
1301 | .open = zpl_open, |
1302 | .release = zpl_release, | |
802e7b5f | 1303 | .llseek = zpl_llseek, |
57ae8400 | 1304 | #ifdef HAVE_VFS_RW_ITERATE |
7a789346 CC |
1305 | #ifdef HAVE_NEW_SYNC_READ |
1306 | .read = new_sync_read, | |
1307 | .write = new_sync_write, | |
1308 | #endif | |
57ae8400 MK |
1309 | .read_iter = zpl_iter_read, |
1310 | .write_iter = zpl_iter_write, | |
1c2358c1 BB |
1311 | #ifdef HAVE_VFS_IOV_ITER |
1312 | .splice_read = generic_file_splice_read, | |
1313 | .splice_write = iter_file_splice_write, | |
1314 | #endif | |
57ae8400 | 1315 | #else |
7a789346 CC |
1316 | .read = do_sync_read, |
1317 | .write = do_sync_write, | |
cd3939c5 RY |
1318 | .aio_read = zpl_aio_read, |
1319 | .aio_write = zpl_aio_write, | |
57ae8400 | 1320 | #endif |
c0d35759 | 1321 | .mmap = zpl_mmap, |
1efb473f | 1322 | .fsync = zpl_fsync, |
7ca25051 | 1323 | #ifdef HAVE_FILE_AIO_FSYNC |
cd3939c5 | 1324 | .aio_fsync = zpl_aio_fsync, |
7ca25051 | 1325 | #endif |
d1d7e268 | 1326 | .fallocate = zpl_fallocate, |
320f0c60 FY |
1327 | #ifdef HAVE_FILE_FADVISE |
1328 | .fadvise = zpl_fadvise, | |
1329 | #endif | |
d1d7e268 | 1330 | .unlocked_ioctl = zpl_ioctl, |
88c28395 | 1331 | #ifdef CONFIG_COMPAT |
d1d7e268 | 1332 | .compat_ioctl = zpl_compat_ioctl, |
88c28395 | 1333 | #endif |
1efb473f BB |
1334 | }; |
1335 | ||
1336 | const struct file_operations zpl_dir_file_operations = { | |
1337 | .llseek = generic_file_llseek, | |
1338 | .read = generic_read_dir, | |
9464b959 | 1339 | #if defined(HAVE_VFS_ITERATE_SHARED) |
9baaa7de CC |
1340 | .iterate_shared = zpl_iterate, |
1341 | #elif defined(HAVE_VFS_ITERATE) | |
0f37d0c8 RY |
1342 | .iterate = zpl_iterate, |
1343 | #else | |
1efb473f | 1344 | .readdir = zpl_readdir, |
0f37d0c8 | 1345 | #endif |
1efb473f | 1346 | .fsync = zpl_fsync, |
88c28395 BB |
1347 | .unlocked_ioctl = zpl_ioctl, |
1348 | #ifdef CONFIG_COMPAT | |
1349 | .compat_ioctl = zpl_compat_ioctl, | |
1350 | #endif | |
1efb473f | 1351 | }; |
f734301d | 1352 | |
7ada752a | 1353 | /* CSTYLED */ |
f734301d AD |
1354 | module_param(zfs_fallocate_reserve_percent, uint, 0644); |
1355 | MODULE_PARM_DESC(zfs_fallocate_reserve_percent, | |
7ada752a | 1356 | "Percentage of length to use for the available capacity check"); |