]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zpl_file.c
Retire .write/.read file operations
[mirror_zfs.git] / module / zfs / zpl_file.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24 */
25
26
27 #ifdef CONFIG_COMPAT
28 #include <linux/compat.h>
29 #endif
30 #include <sys/dmu_objset.h>
31 #include <sys/zfs_vfsops.h>
32 #include <sys/zfs_vnops.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zpl.h>
35
36
37 static int
38 zpl_open(struct inode *ip, struct file *filp)
39 {
40 cred_t *cr = CRED();
41 int error;
42 fstrans_cookie_t cookie;
43
44 error = generic_file_open(ip, filp);
45 if (error)
46 return (error);
47
48 crhold(cr);
49 cookie = spl_fstrans_mark();
50 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
51 spl_fstrans_unmark(cookie);
52 crfree(cr);
53 ASSERT3S(error, <=, 0);
54
55 return (error);
56 }
57
58 static int
59 zpl_release(struct inode *ip, struct file *filp)
60 {
61 cred_t *cr = CRED();
62 int error;
63 fstrans_cookie_t cookie;
64
65 cookie = spl_fstrans_mark();
66 if (ITOZ(ip)->z_atime_dirty)
67 zfs_mark_inode_dirty(ip);
68
69 crhold(cr);
70 error = -zfs_close(ip, filp->f_flags, cr);
71 spl_fstrans_unmark(cookie);
72 crfree(cr);
73 ASSERT3S(error, <=, 0);
74
75 return (error);
76 }
77
78 static int
79 zpl_iterate(struct file *filp, struct dir_context *ctx)
80 {
81 cred_t *cr = CRED();
82 int error;
83 fstrans_cookie_t cookie;
84
85 crhold(cr);
86 cookie = spl_fstrans_mark();
87 error = -zfs_readdir(file_inode(filp), ctx, cr);
88 spl_fstrans_unmark(cookie);
89 crfree(cr);
90 ASSERT3S(error, <=, 0);
91
92 return (error);
93 }
94
95 #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
96 static int
97 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
98 {
99 struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
100 int error;
101
102 error = zpl_iterate(filp, &ctx);
103 filp->f_pos = ctx.pos;
104
105 return (error);
106 }
107 #endif /* HAVE_VFS_ITERATE */
108
109 #if defined(HAVE_FSYNC_WITH_DENTRY)
110 /*
111 * Linux 2.6.x - 2.6.34 API,
112 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
113 * to the fops->fsync() hook. For this reason, we must be careful not to
114 * use filp unconditionally.
115 */
116 static int
117 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
118 {
119 cred_t *cr = CRED();
120 int error;
121 fstrans_cookie_t cookie;
122
123 crhold(cr);
124 cookie = spl_fstrans_mark();
125 error = -zfs_fsync(dentry->d_inode, datasync, cr);
126 spl_fstrans_unmark(cookie);
127 crfree(cr);
128 ASSERT3S(error, <=, 0);
129
130 return (error);
131 }
132
133 #ifdef HAVE_FILE_AIO_FSYNC
134 static int
135 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
136 {
137 struct file *filp = kiocb->ki_filp;
138 return (zpl_fsync(filp, file_dentry(filp), datasync));
139 }
140 #endif
141
142 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
143 /*
144 * Linux 2.6.35 - 3.0 API,
145 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
146 * redundant. The dentry is still accessible via filp->f_path.dentry,
147 * and we are guaranteed that filp will never be NULL.
148 */
149 static int
150 zpl_fsync(struct file *filp, int datasync)
151 {
152 struct inode *inode = filp->f_mapping->host;
153 cred_t *cr = CRED();
154 int error;
155 fstrans_cookie_t cookie;
156
157 crhold(cr);
158 cookie = spl_fstrans_mark();
159 error = -zfs_fsync(inode, datasync, cr);
160 spl_fstrans_unmark(cookie);
161 crfree(cr);
162 ASSERT3S(error, <=, 0);
163
164 return (error);
165 }
166
167 #ifdef HAVE_FILE_AIO_FSYNC
168 static int
169 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
170 {
171 return (zpl_fsync(kiocb->ki_filp, datasync));
172 }
173 #endif
174
175 #elif defined(HAVE_FSYNC_RANGE)
176 /*
177 * Linux 3.1 - 3.x API,
178 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
179 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
180 * lock is no longer held by the caller, for zfs we don't require the lock
181 * to be held so we don't acquire it.
182 */
183 static int
184 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
185 {
186 struct inode *inode = filp->f_mapping->host;
187 cred_t *cr = CRED();
188 int error;
189 fstrans_cookie_t cookie;
190
191 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
192 if (error)
193 return (error);
194
195 crhold(cr);
196 cookie = spl_fstrans_mark();
197 error = -zfs_fsync(inode, datasync, cr);
198 spl_fstrans_unmark(cookie);
199 crfree(cr);
200 ASSERT3S(error, <=, 0);
201
202 return (error);
203 }
204
205 #ifdef HAVE_FILE_AIO_FSYNC
206 static int
207 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
208 {
209 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
210 }
211 #endif
212
213 #else
214 #error "Unsupported fops->fsync() implementation"
215 #endif
216
217 static ssize_t
218 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
219 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
220 cred_t *cr, size_t skip)
221 {
222 ssize_t read;
223 uio_t uio;
224 int error;
225 fstrans_cookie_t cookie;
226
227 uio.uio_iov = iovp;
228 uio.uio_skip = skip;
229 uio.uio_resid = count;
230 uio.uio_iovcnt = nr_segs;
231 uio.uio_loffset = *ppos;
232 uio.uio_limit = MAXOFFSET_T;
233 uio.uio_segflg = segment;
234
235 cookie = spl_fstrans_mark();
236 error = -zfs_read(ip, &uio, flags, cr);
237 spl_fstrans_unmark(cookie);
238 if (error < 0)
239 return (error);
240
241 read = count - uio.uio_resid;
242 *ppos += read;
243 task_io_account_read(read);
244
245 return (read);
246 }
247
248 inline ssize_t
249 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
250 uio_seg_t segment, int flags, cred_t *cr)
251 {
252 struct iovec iov;
253
254 iov.iov_base = (void *)buf;
255 iov.iov_len = len;
256
257 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
258 flags, cr, 0));
259 }
260
261 static ssize_t
262 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
263 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
264 {
265 cred_t *cr = CRED();
266 struct file *filp = kiocb->ki_filp;
267 ssize_t read;
268
269 crhold(cr);
270 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
271 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
272 crfree(cr);
273
274 file_accessed(filp);
275 return (read);
276 }
277
278 #if defined(HAVE_VFS_RW_ITERATE)
279 static ssize_t
280 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
281 {
282 ssize_t ret;
283 uio_seg_t seg = UIO_USERSPACE;
284 if (to->type & ITER_KVEC)
285 seg = UIO_SYSSPACE;
286 if (to->type & ITER_BVEC)
287 seg = UIO_BVEC;
288 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
289 iov_iter_count(to), seg, to->iov_offset);
290 if (ret > 0)
291 iov_iter_advance(to, ret);
292 return (ret);
293 }
294 #else
295 static ssize_t
296 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
297 unsigned long nr_segs, loff_t pos)
298 {
299 ssize_t ret;
300 size_t count;
301
302 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
303 if (ret)
304 return (ret);
305
306 return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
307 UIO_USERSPACE, 0));
308 }
309 #endif /* HAVE_VFS_RW_ITERATE */
310
311 static ssize_t
312 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
313 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
314 cred_t *cr, size_t skip)
315 {
316 ssize_t wrote;
317 uio_t uio;
318 int error;
319 fstrans_cookie_t cookie;
320
321 if (flags & O_APPEND)
322 *ppos = i_size_read(ip);
323
324 uio.uio_iov = iovp;
325 uio.uio_skip = skip;
326 uio.uio_resid = count;
327 uio.uio_iovcnt = nr_segs;
328 uio.uio_loffset = *ppos;
329 uio.uio_limit = MAXOFFSET_T;
330 uio.uio_segflg = segment;
331
332 cookie = spl_fstrans_mark();
333 error = -zfs_write(ip, &uio, flags, cr);
334 spl_fstrans_unmark(cookie);
335 if (error < 0)
336 return (error);
337
338 wrote = count - uio.uio_resid;
339 *ppos += wrote;
340 task_io_account_write(wrote);
341
342 return (wrote);
343 }
344
345 inline ssize_t
346 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
347 uio_seg_t segment, int flags, cred_t *cr)
348 {
349 struct iovec iov;
350
351 iov.iov_base = (void *)buf;
352 iov.iov_len = len;
353
354 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
355 flags, cr, 0));
356 }
357
358 static ssize_t
359 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
360 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
361 {
362 cred_t *cr = CRED();
363 struct file *filp = kiocb->ki_filp;
364 ssize_t wrote;
365
366 crhold(cr);
367 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
368 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
369 crfree(cr);
370
371 return (wrote);
372 }
373
374 #if defined(HAVE_VFS_RW_ITERATE)
375 static ssize_t
376 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
377 {
378 size_t count;
379 ssize_t ret;
380 uio_seg_t seg = UIO_USERSPACE;
381
382 #ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
383 struct file *file = kiocb->ki_filp;
384 struct address_space *mapping = file->f_mapping;
385 struct inode *ip = mapping->host;
386 int isblk = S_ISBLK(ip->i_mode);
387
388 count = iov_iter_count(from);
389 ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
390 #else
391 /*
392 * XXX - ideally this check should be in the same lock region with
393 * write operations, so that there's no TOCTTOU race when doing
394 * append and someone else grow the file.
395 */
396 ret = generic_write_checks(kiocb, from);
397 count = ret;
398 #endif
399 if (ret <= 0)
400 return (ret);
401
402 if (from->type & ITER_KVEC)
403 seg = UIO_SYSSPACE;
404 if (from->type & ITER_BVEC)
405 seg = UIO_BVEC;
406
407 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
408 count, seg, from->iov_offset);
409 if (ret > 0)
410 iov_iter_advance(from, ret);
411
412 return (ret);
413 }
414 #else
415 static ssize_t
416 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
417 unsigned long nr_segs, loff_t pos)
418 {
419 struct file *file = kiocb->ki_filp;
420 struct address_space *mapping = file->f_mapping;
421 struct inode *ip = mapping->host;
422 int isblk = S_ISBLK(ip->i_mode);
423 size_t count;
424 ssize_t ret;
425
426 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
427 if (ret)
428 return (ret);
429
430 ret = generic_write_checks(file, &pos, &count, isblk);
431 if (ret)
432 return (ret);
433
434 return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
435 UIO_USERSPACE, 0));
436 }
437 #endif /* HAVE_VFS_RW_ITERATE */
438
439 static loff_t
440 zpl_llseek(struct file *filp, loff_t offset, int whence)
441 {
442 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
443 fstrans_cookie_t cookie;
444
445 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
446 struct inode *ip = filp->f_mapping->host;
447 loff_t maxbytes = ip->i_sb->s_maxbytes;
448 loff_t error;
449
450 spl_inode_lock_shared(ip);
451 cookie = spl_fstrans_mark();
452 error = -zfs_holey(ip, whence, &offset);
453 spl_fstrans_unmark(cookie);
454 if (error == 0)
455 error = lseek_execute(filp, ip, offset, maxbytes);
456 spl_inode_unlock_shared(ip);
457
458 return (error);
459 }
460 #endif /* SEEK_HOLE && SEEK_DATA */
461
462 return (generic_file_llseek(filp, offset, whence));
463 }
464
465 /*
466 * It's worth taking a moment to describe how mmap is implemented
467 * for zfs because it differs considerably from other Linux filesystems.
468 * However, this issue is handled the same way under OpenSolaris.
469 *
470 * The issue is that by design zfs bypasses the Linux page cache and
471 * leaves all caching up to the ARC. This has been shown to work
472 * well for the common read(2)/write(2) case. However, mmap(2)
473 * is problem because it relies on being tightly integrated with the
474 * page cache. To handle this we cache mmap'ed files twice, once in
475 * the ARC and a second time in the page cache. The code is careful
476 * to keep both copies synchronized.
477 *
478 * When a file with an mmap'ed region is written to using write(2)
479 * both the data in the ARC and existing pages in the page cache
480 * are updated. For a read(2) data will be read first from the page
481 * cache then the ARC if needed. Neither a write(2) or read(2) will
482 * will ever result in new pages being added to the page cache.
483 *
484 * New pages are added to the page cache only via .readpage() which
485 * is called when the vfs needs to read a page off disk to back the
486 * virtual memory region. These pages may be modified without
487 * notifying the ARC and will be written out periodically via
488 * .writepage(). This will occur due to either a sync or the usual
489 * page aging behavior. Note because a read(2) of a mmap'ed file
490 * will always check the page cache first even when the ARC is out
491 * of date correct data will still be returned.
492 *
493 * While this implementation ensures correct behavior it does have
494 * have some drawbacks. The most obvious of which is that it
495 * increases the required memory footprint when access mmap'ed
496 * files. It also adds additional complexity to the code keeping
497 * both caches synchronized.
498 *
499 * Longer term it may be possible to cleanly resolve this wart by
500 * mapping page cache pages directly on to the ARC buffers. The
501 * Linux address space operations are flexible enough to allow
502 * selection of which pages back a particular index. The trick
503 * would be working out the details of which subsystem is in
504 * charge, the ARC, the page cache, or both. It may also prove
505 * helpful to move the ARC buffers to a scatter-gather lists
506 * rather than a vmalloc'ed region.
507 */
508 static int
509 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
510 {
511 struct inode *ip = filp->f_mapping->host;
512 znode_t *zp = ITOZ(ip);
513 int error;
514 fstrans_cookie_t cookie;
515
516 cookie = spl_fstrans_mark();
517 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
518 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
519 spl_fstrans_unmark(cookie);
520 if (error)
521 return (error);
522
523 error = generic_file_mmap(filp, vma);
524 if (error)
525 return (error);
526
527 mutex_enter(&zp->z_lock);
528 zp->z_is_mapped = 1;
529 mutex_exit(&zp->z_lock);
530
531 return (error);
532 }
533
534 /*
535 * Populate a page with data for the Linux page cache. This function is
536 * only used to support mmap(2). There will be an identical copy of the
537 * data in the ARC which is kept up to date via .write() and .writepage().
538 *
539 * Current this function relies on zpl_read_common() and the O_DIRECT
540 * flag to read in a page. This works but the more correct way is to
541 * update zfs_fillpage() to be Linux friendly and use that interface.
542 */
543 static int
544 zpl_readpage(struct file *filp, struct page *pp)
545 {
546 struct inode *ip;
547 struct page *pl[1];
548 int error = 0;
549 fstrans_cookie_t cookie;
550
551 ASSERT(PageLocked(pp));
552 ip = pp->mapping->host;
553 pl[0] = pp;
554
555 cookie = spl_fstrans_mark();
556 error = -zfs_getpage(ip, pl, 1);
557 spl_fstrans_unmark(cookie);
558
559 if (error) {
560 SetPageError(pp);
561 ClearPageUptodate(pp);
562 } else {
563 ClearPageError(pp);
564 SetPageUptodate(pp);
565 flush_dcache_page(pp);
566 }
567
568 unlock_page(pp);
569 return (error);
570 }
571
572 /*
573 * Populate a set of pages with data for the Linux page cache. This
574 * function will only be called for read ahead and never for demand
575 * paging. For simplicity, the code relies on read_cache_pages() to
576 * correctly lock each page for IO and call zpl_readpage().
577 */
578 static int
579 zpl_readpages(struct file *filp, struct address_space *mapping,
580 struct list_head *pages, unsigned nr_pages)
581 {
582 return (read_cache_pages(mapping, pages,
583 (filler_t *)zpl_readpage, filp));
584 }
585
586 int
587 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
588 {
589 struct address_space *mapping = data;
590 fstrans_cookie_t cookie;
591
592 ASSERT(PageLocked(pp));
593 ASSERT(!PageWriteback(pp));
594
595 cookie = spl_fstrans_mark();
596 (void) zfs_putpage(mapping->host, pp, wbc);
597 spl_fstrans_unmark(cookie);
598
599 return (0);
600 }
601
602 static int
603 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
604 {
605 znode_t *zp = ITOZ(mapping->host);
606 zfs_sb_t *zsb = ITOZSB(mapping->host);
607 enum writeback_sync_modes sync_mode;
608 int result;
609
610 ZFS_ENTER(zsb);
611 if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
612 wbc->sync_mode = WB_SYNC_ALL;
613 ZFS_EXIT(zsb);
614 sync_mode = wbc->sync_mode;
615
616 /*
617 * We don't want to run write_cache_pages() in SYNC mode here, because
618 * that would make putpage() wait for a single page to be committed to
619 * disk every single time, resulting in atrocious performance. Instead
620 * we run it once in non-SYNC mode so that the ZIL gets all the data,
621 * and then we commit it all in one go.
622 */
623 wbc->sync_mode = WB_SYNC_NONE;
624 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
625 if (sync_mode != wbc->sync_mode) {
626 ZFS_ENTER(zsb);
627 ZFS_VERIFY_ZP(zp);
628 if (zsb->z_log != NULL)
629 zil_commit(zsb->z_log, zp->z_id);
630 ZFS_EXIT(zsb);
631
632 /*
633 * We need to call write_cache_pages() again (we can't just
634 * return after the commit) because the previous call in
635 * non-SYNC mode does not guarantee that we got all the dirty
636 * pages (see the implementation of write_cache_pages() for
637 * details). That being said, this is a no-op in most cases.
638 */
639 wbc->sync_mode = sync_mode;
640 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
641 }
642 return (result);
643 }
644
645 /*
646 * Write out dirty pages to the ARC, this function is only required to
647 * support mmap(2). Mapped pages may be dirtied by memory operations
648 * which never call .write(). These dirty pages are kept in sync with
649 * the ARC buffers via this hook.
650 */
651 static int
652 zpl_writepage(struct page *pp, struct writeback_control *wbc)
653 {
654 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
655 wbc->sync_mode = WB_SYNC_ALL;
656
657 return (zpl_putpage(pp, wbc, pp->mapping));
658 }
659
660 /*
661 * The only flag combination which matches the behavior of zfs_space()
662 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
663 * flag was introduced in the 2.6.38 kernel.
664 */
665 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
666 long
667 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
668 {
669 int error = -EOPNOTSUPP;
670
671 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
672 cred_t *cr = CRED();
673 flock64_t bf;
674 loff_t olen;
675 fstrans_cookie_t cookie;
676
677 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
678 return (error);
679
680 if (offset < 0 || len <= 0)
681 return (-EINVAL);
682
683 spl_inode_lock(ip);
684 olen = i_size_read(ip);
685
686 if (offset > olen) {
687 spl_inode_unlock(ip);
688 return (0);
689 }
690 if (offset + len > olen)
691 len = olen - offset;
692 bf.l_type = F_WRLCK;
693 bf.l_whence = 0;
694 bf.l_start = offset;
695 bf.l_len = len;
696 bf.l_pid = 0;
697
698 crhold(cr);
699 cookie = spl_fstrans_mark();
700 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
701 spl_fstrans_unmark(cookie);
702 spl_inode_unlock(ip);
703
704 crfree(cr);
705 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
706
707 ASSERT3S(error, <=, 0);
708 return (error);
709 }
710 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
711
712 #ifdef HAVE_FILE_FALLOCATE
713 static long
714 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
715 {
716 return zpl_fallocate_common(file_inode(filp),
717 mode, offset, len);
718 }
719 #endif /* HAVE_FILE_FALLOCATE */
720
721 /*
722 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
723 * attributes common to both Linux and Solaris are mapped.
724 */
725 static int
726 zpl_ioctl_getflags(struct file *filp, void __user *arg)
727 {
728 struct inode *ip = file_inode(filp);
729 unsigned int ioctl_flags = 0;
730 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
731 int error;
732
733 if (zfs_flags & ZFS_IMMUTABLE)
734 ioctl_flags |= FS_IMMUTABLE_FL;
735
736 if (zfs_flags & ZFS_APPENDONLY)
737 ioctl_flags |= FS_APPEND_FL;
738
739 if (zfs_flags & ZFS_NODUMP)
740 ioctl_flags |= FS_NODUMP_FL;
741
742 ioctl_flags &= FS_FL_USER_VISIBLE;
743
744 error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
745
746 return (error);
747 }
748
749 /*
750 * fchange() is a helper macro to detect if we have been asked to change a
751 * flag. This is ugly, but the requirement that we do this is a consequence of
752 * how the Linux file attribute interface was designed. Another consequence is
753 * that concurrent modification of files suffers from a TOCTOU race. Neither
754 * are things we can fix without modifying the kernel-userland interface, which
755 * is outside of our jurisdiction.
756 */
757
758 #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
759
760 static int
761 zpl_ioctl_setflags(struct file *filp, void __user *arg)
762 {
763 struct inode *ip = file_inode(filp);
764 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
765 unsigned int ioctl_flags;
766 cred_t *cr = CRED();
767 xvattr_t xva;
768 xoptattr_t *xoap;
769 int error;
770 fstrans_cookie_t cookie;
771
772 if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
773 return (-EFAULT);
774
775 if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
776 return (-EOPNOTSUPP);
777
778 if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
779 return (-EACCES);
780
781 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
782 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
783 !capable(CAP_LINUX_IMMUTABLE))
784 return (-EACCES);
785
786 if (!zpl_inode_owner_or_capable(ip))
787 return (-EACCES);
788
789 xva_init(&xva);
790 xoap = xva_getxoptattr(&xva);
791
792 XVA_SET_REQ(&xva, XAT_IMMUTABLE);
793 if (ioctl_flags & FS_IMMUTABLE_FL)
794 xoap->xoa_immutable = B_TRUE;
795
796 XVA_SET_REQ(&xva, XAT_APPENDONLY);
797 if (ioctl_flags & FS_APPEND_FL)
798 xoap->xoa_appendonly = B_TRUE;
799
800 XVA_SET_REQ(&xva, XAT_NODUMP);
801 if (ioctl_flags & FS_NODUMP_FL)
802 xoap->xoa_nodump = B_TRUE;
803
804 crhold(cr);
805 cookie = spl_fstrans_mark();
806 error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
807 spl_fstrans_unmark(cookie);
808 crfree(cr);
809
810 return (error);
811 }
812
813 static long
814 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
815 {
816 switch (cmd) {
817 case FS_IOC_GETFLAGS:
818 return (zpl_ioctl_getflags(filp, (void *)arg));
819 case FS_IOC_SETFLAGS:
820 return (zpl_ioctl_setflags(filp, (void *)arg));
821 default:
822 return (-ENOTTY);
823 }
824 }
825
826 #ifdef CONFIG_COMPAT
827 static long
828 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
829 {
830 switch (cmd) {
831 case FS_IOC32_GETFLAGS:
832 cmd = FS_IOC_GETFLAGS;
833 break;
834 case FS_IOC32_SETFLAGS:
835 cmd = FS_IOC_SETFLAGS;
836 break;
837 default:
838 return (-ENOTTY);
839 }
840 return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
841 }
842 #endif /* CONFIG_COMPAT */
843
844
845 const struct address_space_operations zpl_address_space_operations = {
846 .readpages = zpl_readpages,
847 .readpage = zpl_readpage,
848 .writepage = zpl_writepage,
849 .writepages = zpl_writepages,
850 };
851
852 const struct file_operations zpl_file_operations = {
853 .open = zpl_open,
854 .release = zpl_release,
855 .llseek = zpl_llseek,
856 #ifdef HAVE_VFS_RW_ITERATE
857 .read_iter = zpl_iter_read,
858 .write_iter = zpl_iter_write,
859 #else
860 .aio_read = zpl_aio_read,
861 .aio_write = zpl_aio_write,
862 #endif
863 .mmap = zpl_mmap,
864 .fsync = zpl_fsync,
865 #ifdef HAVE_FILE_AIO_FSYNC
866 .aio_fsync = zpl_aio_fsync,
867 #endif
868 #ifdef HAVE_FILE_FALLOCATE
869 .fallocate = zpl_fallocate,
870 #endif /* HAVE_FILE_FALLOCATE */
871 .unlocked_ioctl = zpl_ioctl,
872 #ifdef CONFIG_COMPAT
873 .compat_ioctl = zpl_compat_ioctl,
874 #endif
875 };
876
877 const struct file_operations zpl_dir_file_operations = {
878 .llseek = generic_file_llseek,
879 .read = generic_read_dir,
880 #ifdef HAVE_VFS_ITERATE_SHARED
881 .iterate_shared = zpl_iterate,
882 #elif defined(HAVE_VFS_ITERATE)
883 .iterate = zpl_iterate,
884 #else
885 .readdir = zpl_readdir,
886 #endif
887 .fsync = zpl_fsync,
888 .unlocked_ioctl = zpl_ioctl,
889 #ifdef CONFIG_COMPAT
890 .compat_ioctl = zpl_compat_ioctl,
891 #endif
892 };