]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zpl_file.c
Fix fchange in zpl_ioctl_setflags
[mirror_zfs.git] / module / zfs / zpl_file.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24 */
25
26
27 #ifdef CONFIG_COMPAT
28 #include <linux/compat.h>
29 #endif
30 #include <sys/dmu_objset.h>
31 #include <sys/zfs_vfsops.h>
32 #include <sys/zfs_vnops.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zpl.h>
35
36
37 static int
38 zpl_open(struct inode *ip, struct file *filp)
39 {
40 cred_t *cr = CRED();
41 int error;
42 fstrans_cookie_t cookie;
43
44 error = generic_file_open(ip, filp);
45 if (error)
46 return (error);
47
48 crhold(cr);
49 cookie = spl_fstrans_mark();
50 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
51 spl_fstrans_unmark(cookie);
52 crfree(cr);
53 ASSERT3S(error, <=, 0);
54
55 return (error);
56 }
57
58 static int
59 zpl_release(struct inode *ip, struct file *filp)
60 {
61 cred_t *cr = CRED();
62 int error;
63 fstrans_cookie_t cookie;
64
65 cookie = spl_fstrans_mark();
66 if (ITOZ(ip)->z_atime_dirty)
67 zfs_mark_inode_dirty(ip);
68
69 crhold(cr);
70 error = -zfs_close(ip, filp->f_flags, cr);
71 spl_fstrans_unmark(cookie);
72 crfree(cr);
73 ASSERT3S(error, <=, 0);
74
75 return (error);
76 }
77
78 static int
79 zpl_iterate(struct file *filp, struct dir_context *ctx)
80 {
81 cred_t *cr = CRED();
82 int error;
83 fstrans_cookie_t cookie;
84
85 crhold(cr);
86 cookie = spl_fstrans_mark();
87 error = -zfs_readdir(file_inode(filp), ctx, cr);
88 spl_fstrans_unmark(cookie);
89 crfree(cr);
90 ASSERT3S(error, <=, 0);
91
92 return (error);
93 }
94
95 #if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
96 static int
97 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
98 {
99 struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
100 int error;
101
102 error = zpl_iterate(filp, &ctx);
103 filp->f_pos = ctx.pos;
104
105 return (error);
106 }
107 #endif /* HAVE_VFS_ITERATE */
108
109 #if defined(HAVE_FSYNC_WITH_DENTRY)
110 /*
111 * Linux 2.6.x - 2.6.34 API,
112 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
113 * to the fops->fsync() hook. For this reason, we must be careful not to
114 * use filp unconditionally.
115 */
116 static int
117 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
118 {
119 cred_t *cr = CRED();
120 int error;
121 fstrans_cookie_t cookie;
122
123 crhold(cr);
124 cookie = spl_fstrans_mark();
125 error = -zfs_fsync(dentry->d_inode, datasync, cr);
126 spl_fstrans_unmark(cookie);
127 crfree(cr);
128 ASSERT3S(error, <=, 0);
129
130 return (error);
131 }
132
133 #ifdef HAVE_FILE_AIO_FSYNC
134 static int
135 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
136 {
137 struct file *filp = kiocb->ki_filp;
138 return (zpl_fsync(filp, file_dentry(filp), datasync));
139 }
140 #endif
141
142 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
143 /*
144 * Linux 2.6.35 - 3.0 API,
145 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
146 * redundant. The dentry is still accessible via filp->f_path.dentry,
147 * and we are guaranteed that filp will never be NULL.
148 */
149 static int
150 zpl_fsync(struct file *filp, int datasync)
151 {
152 struct inode *inode = filp->f_mapping->host;
153 cred_t *cr = CRED();
154 int error;
155 fstrans_cookie_t cookie;
156
157 crhold(cr);
158 cookie = spl_fstrans_mark();
159 error = -zfs_fsync(inode, datasync, cr);
160 spl_fstrans_unmark(cookie);
161 crfree(cr);
162 ASSERT3S(error, <=, 0);
163
164 return (error);
165 }
166
167 #ifdef HAVE_FILE_AIO_FSYNC
168 static int
169 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
170 {
171 return (zpl_fsync(kiocb->ki_filp, datasync));
172 }
173 #endif
174
175 #elif defined(HAVE_FSYNC_RANGE)
176 /*
177 * Linux 3.1 - 3.x API,
178 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
179 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
180 * lock is no longer held by the caller, for zfs we don't require the lock
181 * to be held so we don't acquire it.
182 */
183 static int
184 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
185 {
186 struct inode *inode = filp->f_mapping->host;
187 cred_t *cr = CRED();
188 int error;
189 fstrans_cookie_t cookie;
190
191 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
192 if (error)
193 return (error);
194
195 crhold(cr);
196 cookie = spl_fstrans_mark();
197 error = -zfs_fsync(inode, datasync, cr);
198 spl_fstrans_unmark(cookie);
199 crfree(cr);
200 ASSERT3S(error, <=, 0);
201
202 return (error);
203 }
204
205 #ifdef HAVE_FILE_AIO_FSYNC
206 static int
207 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
208 {
209 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
210 }
211 #endif
212
213 #else
214 #error "Unsupported fops->fsync() implementation"
215 #endif
216
217 static ssize_t
218 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
219 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
220 cred_t *cr, size_t skip)
221 {
222 ssize_t read;
223 uio_t uio;
224 int error;
225 fstrans_cookie_t cookie;
226
227 uio.uio_iov = iovp;
228 uio.uio_skip = skip;
229 uio.uio_resid = count;
230 uio.uio_iovcnt = nr_segs;
231 uio.uio_loffset = *ppos;
232 uio.uio_limit = MAXOFFSET_T;
233 uio.uio_segflg = segment;
234
235 cookie = spl_fstrans_mark();
236 error = -zfs_read(ip, &uio, flags, cr);
237 spl_fstrans_unmark(cookie);
238 if (error < 0)
239 return (error);
240
241 read = count - uio.uio_resid;
242 *ppos += read;
243 task_io_account_read(read);
244
245 return (read);
246 }
247
248 inline ssize_t
249 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
250 uio_seg_t segment, int flags, cred_t *cr)
251 {
252 struct iovec iov;
253
254 iov.iov_base = (void *)buf;
255 iov.iov_len = len;
256
257 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
258 flags, cr, 0));
259 }
260
261 static ssize_t
262 zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
263 {
264 cred_t *cr = CRED();
265 ssize_t read;
266
267 crhold(cr);
268 read = zpl_read_common(filp->f_mapping->host, buf, len, ppos,
269 UIO_USERSPACE, filp->f_flags, cr);
270 crfree(cr);
271
272 file_accessed(filp);
273 return (read);
274 }
275
276 static ssize_t
277 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
278 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
279 {
280 cred_t *cr = CRED();
281 struct file *filp = kiocb->ki_filp;
282 ssize_t read;
283
284 crhold(cr);
285 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
286 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
287 crfree(cr);
288
289 file_accessed(filp);
290 return (read);
291 }
292
293 #if defined(HAVE_VFS_RW_ITERATE)
294 static ssize_t
295 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
296 {
297 ssize_t ret;
298 uio_seg_t seg = UIO_USERSPACE;
299 if (to->type & ITER_KVEC)
300 seg = UIO_SYSSPACE;
301 if (to->type & ITER_BVEC)
302 seg = UIO_BVEC;
303 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
304 iov_iter_count(to), seg, to->iov_offset);
305 if (ret > 0)
306 iov_iter_advance(to, ret);
307 return (ret);
308 }
309 #else
310 static ssize_t
311 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
312 unsigned long nr_segs, loff_t pos)
313 {
314 return (zpl_iter_read_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
315 UIO_USERSPACE, 0));
316 }
317 #endif /* HAVE_VFS_RW_ITERATE */
318
319 static ssize_t
320 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
321 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
322 cred_t *cr, size_t skip)
323 {
324 ssize_t wrote;
325 uio_t uio;
326 int error;
327 fstrans_cookie_t cookie;
328
329 if (flags & O_APPEND)
330 *ppos = i_size_read(ip);
331
332 uio.uio_iov = iovp;
333 uio.uio_skip = skip;
334 uio.uio_resid = count;
335 uio.uio_iovcnt = nr_segs;
336 uio.uio_loffset = *ppos;
337 uio.uio_limit = MAXOFFSET_T;
338 uio.uio_segflg = segment;
339
340 cookie = spl_fstrans_mark();
341 error = -zfs_write(ip, &uio, flags, cr);
342 spl_fstrans_unmark(cookie);
343 if (error < 0)
344 return (error);
345
346 wrote = count - uio.uio_resid;
347 *ppos += wrote;
348 task_io_account_write(wrote);
349
350 return (wrote);
351 }
352 inline ssize_t
353 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
354 uio_seg_t segment, int flags, cred_t *cr)
355 {
356 struct iovec iov;
357
358 iov.iov_base = (void *)buf;
359 iov.iov_len = len;
360
361 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
362 flags, cr, 0));
363 }
364
365 static ssize_t
366 zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
367 {
368 cred_t *cr = CRED();
369 ssize_t wrote;
370
371 crhold(cr);
372 wrote = zpl_write_common(filp->f_mapping->host, buf, len, ppos,
373 UIO_USERSPACE, filp->f_flags, cr);
374 crfree(cr);
375
376 return (wrote);
377 }
378
379 static ssize_t
380 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
381 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
382 {
383 cred_t *cr = CRED();
384 struct file *filp = kiocb->ki_filp;
385 ssize_t wrote;
386
387 crhold(cr);
388 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
389 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
390 crfree(cr);
391
392 return (wrote);
393 }
394
395 #if defined(HAVE_VFS_RW_ITERATE)
396 static ssize_t
397 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
398 {
399 ssize_t ret;
400 uio_seg_t seg = UIO_USERSPACE;
401 if (from->type & ITER_KVEC)
402 seg = UIO_SYSSPACE;
403 if (from->type & ITER_BVEC)
404 seg = UIO_BVEC;
405 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
406 iov_iter_count(from), seg, from->iov_offset);
407 if (ret > 0)
408 iov_iter_advance(from, ret);
409 return (ret);
410 }
411 #else
412 static ssize_t
413 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
414 unsigned long nr_segs, loff_t pos)
415 {
416 return (zpl_iter_write_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
417 UIO_USERSPACE, 0));
418 }
419 #endif /* HAVE_VFS_RW_ITERATE */
420
421 static loff_t
422 zpl_llseek(struct file *filp, loff_t offset, int whence)
423 {
424 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
425 fstrans_cookie_t cookie;
426
427 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
428 struct inode *ip = filp->f_mapping->host;
429 loff_t maxbytes = ip->i_sb->s_maxbytes;
430 loff_t error;
431
432 spl_inode_lock_shared(ip);
433 cookie = spl_fstrans_mark();
434 error = -zfs_holey(ip, whence, &offset);
435 spl_fstrans_unmark(cookie);
436 if (error == 0)
437 error = lseek_execute(filp, ip, offset, maxbytes);
438 spl_inode_unlock_shared(ip);
439
440 return (error);
441 }
442 #endif /* SEEK_HOLE && SEEK_DATA */
443
444 return (generic_file_llseek(filp, offset, whence));
445 }
446
447 /*
448 * It's worth taking a moment to describe how mmap is implemented
449 * for zfs because it differs considerably from other Linux filesystems.
450 * However, this issue is handled the same way under OpenSolaris.
451 *
452 * The issue is that by design zfs bypasses the Linux page cache and
453 * leaves all caching up to the ARC. This has been shown to work
454 * well for the common read(2)/write(2) case. However, mmap(2)
455 * is problem because it relies on being tightly integrated with the
456 * page cache. To handle this we cache mmap'ed files twice, once in
457 * the ARC and a second time in the page cache. The code is careful
458 * to keep both copies synchronized.
459 *
460 * When a file with an mmap'ed region is written to using write(2)
461 * both the data in the ARC and existing pages in the page cache
462 * are updated. For a read(2) data will be read first from the page
463 * cache then the ARC if needed. Neither a write(2) or read(2) will
464 * will ever result in new pages being added to the page cache.
465 *
466 * New pages are added to the page cache only via .readpage() which
467 * is called when the vfs needs to read a page off disk to back the
468 * virtual memory region. These pages may be modified without
469 * notifying the ARC and will be written out periodically via
470 * .writepage(). This will occur due to either a sync or the usual
471 * page aging behavior. Note because a read(2) of a mmap'ed file
472 * will always check the page cache first even when the ARC is out
473 * of date correct data will still be returned.
474 *
475 * While this implementation ensures correct behavior it does have
476 * have some drawbacks. The most obvious of which is that it
477 * increases the required memory footprint when access mmap'ed
478 * files. It also adds additional complexity to the code keeping
479 * both caches synchronized.
480 *
481 * Longer term it may be possible to cleanly resolve this wart by
482 * mapping page cache pages directly on to the ARC buffers. The
483 * Linux address space operations are flexible enough to allow
484 * selection of which pages back a particular index. The trick
485 * would be working out the details of which subsystem is in
486 * charge, the ARC, the page cache, or both. It may also prove
487 * helpful to move the ARC buffers to a scatter-gather lists
488 * rather than a vmalloc'ed region.
489 */
490 static int
491 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
492 {
493 struct inode *ip = filp->f_mapping->host;
494 znode_t *zp = ITOZ(ip);
495 int error;
496 fstrans_cookie_t cookie;
497
498 cookie = spl_fstrans_mark();
499 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
500 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
501 spl_fstrans_unmark(cookie);
502 if (error)
503 return (error);
504
505 error = generic_file_mmap(filp, vma);
506 if (error)
507 return (error);
508
509 mutex_enter(&zp->z_lock);
510 zp->z_is_mapped = 1;
511 mutex_exit(&zp->z_lock);
512
513 return (error);
514 }
515
516 /*
517 * Populate a page with data for the Linux page cache. This function is
518 * only used to support mmap(2). There will be an identical copy of the
519 * data in the ARC which is kept up to date via .write() and .writepage().
520 *
521 * Current this function relies on zpl_read_common() and the O_DIRECT
522 * flag to read in a page. This works but the more correct way is to
523 * update zfs_fillpage() to be Linux friendly and use that interface.
524 */
525 static int
526 zpl_readpage(struct file *filp, struct page *pp)
527 {
528 struct inode *ip;
529 struct page *pl[1];
530 int error = 0;
531 fstrans_cookie_t cookie;
532
533 ASSERT(PageLocked(pp));
534 ip = pp->mapping->host;
535 pl[0] = pp;
536
537 cookie = spl_fstrans_mark();
538 error = -zfs_getpage(ip, pl, 1);
539 spl_fstrans_unmark(cookie);
540
541 if (error) {
542 SetPageError(pp);
543 ClearPageUptodate(pp);
544 } else {
545 ClearPageError(pp);
546 SetPageUptodate(pp);
547 flush_dcache_page(pp);
548 }
549
550 unlock_page(pp);
551 return (error);
552 }
553
554 /*
555 * Populate a set of pages with data for the Linux page cache. This
556 * function will only be called for read ahead and never for demand
557 * paging. For simplicity, the code relies on read_cache_pages() to
558 * correctly lock each page for IO and call zpl_readpage().
559 */
560 static int
561 zpl_readpages(struct file *filp, struct address_space *mapping,
562 struct list_head *pages, unsigned nr_pages)
563 {
564 return (read_cache_pages(mapping, pages,
565 (filler_t *)zpl_readpage, filp));
566 }
567
568 int
569 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
570 {
571 struct address_space *mapping = data;
572 fstrans_cookie_t cookie;
573
574 ASSERT(PageLocked(pp));
575 ASSERT(!PageWriteback(pp));
576
577 cookie = spl_fstrans_mark();
578 (void) zfs_putpage(mapping->host, pp, wbc);
579 spl_fstrans_unmark(cookie);
580
581 return (0);
582 }
583
584 static int
585 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
586 {
587 znode_t *zp = ITOZ(mapping->host);
588 zfs_sb_t *zsb = ITOZSB(mapping->host);
589 enum writeback_sync_modes sync_mode;
590 int result;
591
592 ZFS_ENTER(zsb);
593 if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
594 wbc->sync_mode = WB_SYNC_ALL;
595 ZFS_EXIT(zsb);
596 sync_mode = wbc->sync_mode;
597
598 /*
599 * We don't want to run write_cache_pages() in SYNC mode here, because
600 * that would make putpage() wait for a single page to be committed to
601 * disk every single time, resulting in atrocious performance. Instead
602 * we run it once in non-SYNC mode so that the ZIL gets all the data,
603 * and then we commit it all in one go.
604 */
605 wbc->sync_mode = WB_SYNC_NONE;
606 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
607 if (sync_mode != wbc->sync_mode) {
608 ZFS_ENTER(zsb);
609 ZFS_VERIFY_ZP(zp);
610 if (zsb->z_log != NULL)
611 zil_commit(zsb->z_log, zp->z_id);
612 ZFS_EXIT(zsb);
613
614 /*
615 * We need to call write_cache_pages() again (we can't just
616 * return after the commit) because the previous call in
617 * non-SYNC mode does not guarantee that we got all the dirty
618 * pages (see the implementation of write_cache_pages() for
619 * details). That being said, this is a no-op in most cases.
620 */
621 wbc->sync_mode = sync_mode;
622 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
623 }
624 return (result);
625 }
626
627 /*
628 * Write out dirty pages to the ARC, this function is only required to
629 * support mmap(2). Mapped pages may be dirtied by memory operations
630 * which never call .write(). These dirty pages are kept in sync with
631 * the ARC buffers via this hook.
632 */
633 static int
634 zpl_writepage(struct page *pp, struct writeback_control *wbc)
635 {
636 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
637 wbc->sync_mode = WB_SYNC_ALL;
638
639 return (zpl_putpage(pp, wbc, pp->mapping));
640 }
641
642 /*
643 * The only flag combination which matches the behavior of zfs_space()
644 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
645 * flag was introduced in the 2.6.38 kernel.
646 */
647 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
648 long
649 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
650 {
651 int error = -EOPNOTSUPP;
652
653 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
654 cred_t *cr = CRED();
655 flock64_t bf;
656 loff_t olen;
657 fstrans_cookie_t cookie;
658
659 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
660 return (error);
661
662 if (offset < 0 || len <= 0)
663 return (-EINVAL);
664
665 spl_inode_lock(ip);
666 olen = i_size_read(ip);
667
668 if (offset > olen) {
669 spl_inode_unlock(ip);
670 return (0);
671 }
672 if (offset + len > olen)
673 len = olen - offset;
674 bf.l_type = F_WRLCK;
675 bf.l_whence = 0;
676 bf.l_start = offset;
677 bf.l_len = len;
678 bf.l_pid = 0;
679
680 crhold(cr);
681 cookie = spl_fstrans_mark();
682 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
683 spl_fstrans_unmark(cookie);
684 spl_inode_unlock(ip);
685
686 crfree(cr);
687 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
688
689 ASSERT3S(error, <=, 0);
690 return (error);
691 }
692 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
693
694 #ifdef HAVE_FILE_FALLOCATE
695 static long
696 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
697 {
698 return zpl_fallocate_common(file_inode(filp),
699 mode, offset, len);
700 }
701 #endif /* HAVE_FILE_FALLOCATE */
702
703 /*
704 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
705 * attributes common to both Linux and Solaris are mapped.
706 */
707 static int
708 zpl_ioctl_getflags(struct file *filp, void __user *arg)
709 {
710 struct inode *ip = file_inode(filp);
711 unsigned int ioctl_flags = 0;
712 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
713 int error;
714
715 if (zfs_flags & ZFS_IMMUTABLE)
716 ioctl_flags |= FS_IMMUTABLE_FL;
717
718 if (zfs_flags & ZFS_APPENDONLY)
719 ioctl_flags |= FS_APPEND_FL;
720
721 if (zfs_flags & ZFS_NODUMP)
722 ioctl_flags |= FS_NODUMP_FL;
723
724 ioctl_flags &= FS_FL_USER_VISIBLE;
725
726 error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
727
728 return (error);
729 }
730
731 /*
732 * fchange() is a helper macro to detect if we have been asked to change a
733 * flag. This is ugly, but the requirement that we do this is a consequence of
734 * how the Linux file attribute interface was designed. Another consequence is
735 * that concurrent modification of files suffers from a TOCTOU race. Neither
736 * are things we can fix without modifying the kernel-userland interface, which
737 * is outside of our jurisdiction.
738 */
739
740 #define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
741
742 static int
743 zpl_ioctl_setflags(struct file *filp, void __user *arg)
744 {
745 struct inode *ip = file_inode(filp);
746 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
747 unsigned int ioctl_flags;
748 cred_t *cr = CRED();
749 xvattr_t xva;
750 xoptattr_t *xoap;
751 int error;
752 fstrans_cookie_t cookie;
753
754 if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
755 return (-EFAULT);
756
757 if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
758 return (-EOPNOTSUPP);
759
760 if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
761 return (-EACCES);
762
763 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
764 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
765 !capable(CAP_LINUX_IMMUTABLE))
766 return (-EACCES);
767
768 if (!zpl_inode_owner_or_capable(ip))
769 return (-EACCES);
770
771 xva_init(&xva);
772 xoap = xva_getxoptattr(&xva);
773
774 XVA_SET_REQ(&xva, XAT_IMMUTABLE);
775 if (ioctl_flags & FS_IMMUTABLE_FL)
776 xoap->xoa_immutable = B_TRUE;
777
778 XVA_SET_REQ(&xva, XAT_APPENDONLY);
779 if (ioctl_flags & FS_APPEND_FL)
780 xoap->xoa_appendonly = B_TRUE;
781
782 XVA_SET_REQ(&xva, XAT_NODUMP);
783 if (ioctl_flags & FS_NODUMP_FL)
784 xoap->xoa_nodump = B_TRUE;
785
786 crhold(cr);
787 cookie = spl_fstrans_mark();
788 error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
789 spl_fstrans_unmark(cookie);
790 crfree(cr);
791
792 return (error);
793 }
794
795 static long
796 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
797 {
798 switch (cmd) {
799 case FS_IOC_GETFLAGS:
800 return (zpl_ioctl_getflags(filp, (void *)arg));
801 case FS_IOC_SETFLAGS:
802 return (zpl_ioctl_setflags(filp, (void *)arg));
803 default:
804 return (-ENOTTY);
805 }
806 }
807
808 #ifdef CONFIG_COMPAT
809 static long
810 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
811 {
812 switch (cmd) {
813 case FS_IOC32_GETFLAGS:
814 cmd = FS_IOC_GETFLAGS;
815 break;
816 case FS_IOC32_SETFLAGS:
817 cmd = FS_IOC_SETFLAGS;
818 break;
819 default:
820 return (-ENOTTY);
821 }
822 return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
823 }
824 #endif /* CONFIG_COMPAT */
825
826
827 const struct address_space_operations zpl_address_space_operations = {
828 .readpages = zpl_readpages,
829 .readpage = zpl_readpage,
830 .writepage = zpl_writepage,
831 .writepages = zpl_writepages,
832 };
833
834 const struct file_operations zpl_file_operations = {
835 .open = zpl_open,
836 .release = zpl_release,
837 .llseek = zpl_llseek,
838 .read = zpl_read,
839 .write = zpl_write,
840 #ifdef HAVE_VFS_RW_ITERATE
841 .read_iter = zpl_iter_read,
842 .write_iter = zpl_iter_write,
843 #else
844 .aio_read = zpl_aio_read,
845 .aio_write = zpl_aio_write,
846 #endif
847 .mmap = zpl_mmap,
848 .fsync = zpl_fsync,
849 #ifdef HAVE_FILE_AIO_FSYNC
850 .aio_fsync = zpl_aio_fsync,
851 #endif
852 #ifdef HAVE_FILE_FALLOCATE
853 .fallocate = zpl_fallocate,
854 #endif /* HAVE_FILE_FALLOCATE */
855 .unlocked_ioctl = zpl_ioctl,
856 #ifdef CONFIG_COMPAT
857 .compat_ioctl = zpl_compat_ioctl,
858 #endif
859 };
860
861 const struct file_operations zpl_dir_file_operations = {
862 .llseek = generic_file_llseek,
863 .read = generic_read_dir,
864 #ifdef HAVE_VFS_ITERATE_SHARED
865 .iterate_shared = zpl_iterate,
866 #elif defined(HAVE_VFS_ITERATE)
867 .iterate = zpl_iterate,
868 #else
869 .readdir = zpl_readdir,
870 #endif
871 .fsync = zpl_fsync,
872 .unlocked_ioctl = zpl_ioctl,
873 #ifdef CONFIG_COMPAT
874 .compat_ioctl = zpl_compat_ioctl,
875 #endif
876 };